KVM: SVM: Add NMI support for an SEV-ES guest
[sfrench/cifs-2.6.git] / arch / x86 / kvm / svm / sev.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM-SEV support
6  *
7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8  */
9
10 #include <linux/kvm_types.h>
11 #include <linux/kvm_host.h>
12 #include <linux/kernel.h>
13 #include <linux/highmem.h>
14 #include <linux/psp-sev.h>
15 #include <linux/pagemap.h>
16 #include <linux/swap.h>
17 #include <linux/processor.h>
18 #include <linux/trace_events.h>
19
20 #include "x86.h"
21 #include "svm.h"
22 #include "cpuid.h"
23 #include "trace.h"
24
25 static u8 sev_enc_bit;
26 static int sev_flush_asids(void);
27 static DECLARE_RWSEM(sev_deactivate_lock);
28 static DEFINE_MUTEX(sev_bitmap_lock);
29 unsigned int max_sev_asid;
30 static unsigned int min_sev_asid;
31 static unsigned long *sev_asid_bitmap;
32 static unsigned long *sev_reclaim_asid_bitmap;
33 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
34
35 struct enc_region {
36         struct list_head list;
37         unsigned long npages;
38         struct page **pages;
39         unsigned long uaddr;
40         unsigned long size;
41 };
42
43 static int sev_flush_asids(void)
44 {
45         int ret, error = 0;
46
47         /*
48          * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
49          * so it must be guarded.
50          */
51         down_write(&sev_deactivate_lock);
52
53         wbinvd_on_all_cpus();
54         ret = sev_guest_df_flush(&error);
55
56         up_write(&sev_deactivate_lock);
57
58         if (ret)
59                 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
60
61         return ret;
62 }
63
64 /* Must be called with the sev_bitmap_lock held */
65 static bool __sev_recycle_asids(void)
66 {
67         int pos;
68
69         /* Check if there are any ASIDs to reclaim before performing a flush */
70         pos = find_next_bit(sev_reclaim_asid_bitmap,
71                             max_sev_asid, min_sev_asid - 1);
72         if (pos >= max_sev_asid)
73                 return false;
74
75         if (sev_flush_asids())
76                 return false;
77
78         bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
79                    max_sev_asid);
80         bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
81
82         return true;
83 }
84
85 static int sev_asid_new(void)
86 {
87         bool retry = true;
88         int pos;
89
90         mutex_lock(&sev_bitmap_lock);
91
92         /*
93          * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
94          */
95 again:
96         pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
97         if (pos >= max_sev_asid) {
98                 if (retry && __sev_recycle_asids()) {
99                         retry = false;
100                         goto again;
101                 }
102                 mutex_unlock(&sev_bitmap_lock);
103                 return -EBUSY;
104         }
105
106         __set_bit(pos, sev_asid_bitmap);
107
108         mutex_unlock(&sev_bitmap_lock);
109
110         return pos + 1;
111 }
112
113 static int sev_get_asid(struct kvm *kvm)
114 {
115         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
116
117         return sev->asid;
118 }
119
120 static void sev_asid_free(int asid)
121 {
122         struct svm_cpu_data *sd;
123         int cpu, pos;
124
125         mutex_lock(&sev_bitmap_lock);
126
127         pos = asid - 1;
128         __set_bit(pos, sev_reclaim_asid_bitmap);
129
130         for_each_possible_cpu(cpu) {
131                 sd = per_cpu(svm_data, cpu);
132                 sd->sev_vmcbs[pos] = NULL;
133         }
134
135         mutex_unlock(&sev_bitmap_lock);
136 }
137
138 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
139 {
140         struct sev_data_decommission *decommission;
141         struct sev_data_deactivate *data;
142
143         if (!handle)
144                 return;
145
146         data = kzalloc(sizeof(*data), GFP_KERNEL);
147         if (!data)
148                 return;
149
150         /* deactivate handle */
151         data->handle = handle;
152
153         /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
154         down_read(&sev_deactivate_lock);
155         sev_guest_deactivate(data, NULL);
156         up_read(&sev_deactivate_lock);
157
158         kfree(data);
159
160         decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
161         if (!decommission)
162                 return;
163
164         /* decommission handle */
165         decommission->handle = handle;
166         sev_guest_decommission(decommission, NULL);
167
168         kfree(decommission);
169 }
170
171 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
172 {
173         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
174         int asid, ret;
175
176         ret = -EBUSY;
177         if (unlikely(sev->active))
178                 return ret;
179
180         asid = sev_asid_new();
181         if (asid < 0)
182                 return ret;
183
184         ret = sev_platform_init(&argp->error);
185         if (ret)
186                 goto e_free;
187
188         sev->active = true;
189         sev->asid = asid;
190         INIT_LIST_HEAD(&sev->regions_list);
191
192         return 0;
193
194 e_free:
195         sev_asid_free(asid);
196         return ret;
197 }
198
199 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
200 {
201         struct sev_data_activate *data;
202         int asid = sev_get_asid(kvm);
203         int ret;
204
205         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
206         if (!data)
207                 return -ENOMEM;
208
209         /* activate ASID on the given handle */
210         data->handle = handle;
211         data->asid   = asid;
212         ret = sev_guest_activate(data, error);
213         kfree(data);
214
215         return ret;
216 }
217
218 static int __sev_issue_cmd(int fd, int id, void *data, int *error)
219 {
220         struct fd f;
221         int ret;
222
223         f = fdget(fd);
224         if (!f.file)
225                 return -EBADF;
226
227         ret = sev_issue_cmd_external_user(f.file, id, data, error);
228
229         fdput(f);
230         return ret;
231 }
232
233 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
234 {
235         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
236
237         return __sev_issue_cmd(sev->fd, id, data, error);
238 }
239
240 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
241 {
242         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
243         struct sev_data_launch_start *start;
244         struct kvm_sev_launch_start params;
245         void *dh_blob, *session_blob;
246         int *error = &argp->error;
247         int ret;
248
249         if (!sev_guest(kvm))
250                 return -ENOTTY;
251
252         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
253                 return -EFAULT;
254
255         start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
256         if (!start)
257                 return -ENOMEM;
258
259         dh_blob = NULL;
260         if (params.dh_uaddr) {
261                 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
262                 if (IS_ERR(dh_blob)) {
263                         ret = PTR_ERR(dh_blob);
264                         goto e_free;
265                 }
266
267                 start->dh_cert_address = __sme_set(__pa(dh_blob));
268                 start->dh_cert_len = params.dh_len;
269         }
270
271         session_blob = NULL;
272         if (params.session_uaddr) {
273                 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
274                 if (IS_ERR(session_blob)) {
275                         ret = PTR_ERR(session_blob);
276                         goto e_free_dh;
277                 }
278
279                 start->session_address = __sme_set(__pa(session_blob));
280                 start->session_len = params.session_len;
281         }
282
283         start->handle = params.handle;
284         start->policy = params.policy;
285
286         /* create memory encryption context */
287         ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
288         if (ret)
289                 goto e_free_session;
290
291         /* Bind ASID to this guest */
292         ret = sev_bind_asid(kvm, start->handle, error);
293         if (ret)
294                 goto e_free_session;
295
296         /* return handle to userspace */
297         params.handle = start->handle;
298         if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
299                 sev_unbind_asid(kvm, start->handle);
300                 ret = -EFAULT;
301                 goto e_free_session;
302         }
303
304         sev->handle = start->handle;
305         sev->fd = argp->sev_fd;
306
307 e_free_session:
308         kfree(session_blob);
309 e_free_dh:
310         kfree(dh_blob);
311 e_free:
312         kfree(start);
313         return ret;
314 }
315
316 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
317                                     unsigned long ulen, unsigned long *n,
318                                     int write)
319 {
320         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
321         unsigned long npages, size;
322         int npinned;
323         unsigned long locked, lock_limit;
324         struct page **pages;
325         unsigned long first, last;
326         int ret;
327
328         if (ulen == 0 || uaddr + ulen < uaddr)
329                 return ERR_PTR(-EINVAL);
330
331         /* Calculate number of pages. */
332         first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
333         last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
334         npages = (last - first + 1);
335
336         locked = sev->pages_locked + npages;
337         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
338         if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
339                 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
340                 return ERR_PTR(-ENOMEM);
341         }
342
343         if (WARN_ON_ONCE(npages > INT_MAX))
344                 return ERR_PTR(-EINVAL);
345
346         /* Avoid using vmalloc for smaller buffers. */
347         size = npages * sizeof(struct page *);
348         if (size > PAGE_SIZE)
349                 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
350         else
351                 pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
352
353         if (!pages)
354                 return ERR_PTR(-ENOMEM);
355
356         /* Pin the user virtual address. */
357         npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
358         if (npinned != npages) {
359                 pr_err("SEV: Failure locking %lu pages.\n", npages);
360                 ret = -ENOMEM;
361                 goto err;
362         }
363
364         *n = npages;
365         sev->pages_locked = locked;
366
367         return pages;
368
369 err:
370         if (npinned > 0)
371                 unpin_user_pages(pages, npinned);
372
373         kvfree(pages);
374         return ERR_PTR(ret);
375 }
376
377 static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
378                              unsigned long npages)
379 {
380         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
381
382         unpin_user_pages(pages, npages);
383         kvfree(pages);
384         sev->pages_locked -= npages;
385 }
386
387 static void sev_clflush_pages(struct page *pages[], unsigned long npages)
388 {
389         uint8_t *page_virtual;
390         unsigned long i;
391
392         if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
393             pages == NULL)
394                 return;
395
396         for (i = 0; i < npages; i++) {
397                 page_virtual = kmap_atomic(pages[i]);
398                 clflush_cache_range(page_virtual, PAGE_SIZE);
399                 kunmap_atomic(page_virtual);
400         }
401 }
402
403 static unsigned long get_num_contig_pages(unsigned long idx,
404                                 struct page **inpages, unsigned long npages)
405 {
406         unsigned long paddr, next_paddr;
407         unsigned long i = idx + 1, pages = 1;
408
409         /* find the number of contiguous pages starting from idx */
410         paddr = __sme_page_pa(inpages[idx]);
411         while (i < npages) {
412                 next_paddr = __sme_page_pa(inpages[i++]);
413                 if ((paddr + PAGE_SIZE) == next_paddr) {
414                         pages++;
415                         paddr = next_paddr;
416                         continue;
417                 }
418                 break;
419         }
420
421         return pages;
422 }
423
424 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
425 {
426         unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
427         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
428         struct kvm_sev_launch_update_data params;
429         struct sev_data_launch_update_data *data;
430         struct page **inpages;
431         int ret;
432
433         if (!sev_guest(kvm))
434                 return -ENOTTY;
435
436         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
437                 return -EFAULT;
438
439         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
440         if (!data)
441                 return -ENOMEM;
442
443         vaddr = params.uaddr;
444         size = params.len;
445         vaddr_end = vaddr + size;
446
447         /* Lock the user memory. */
448         inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
449         if (IS_ERR(inpages)) {
450                 ret = PTR_ERR(inpages);
451                 goto e_free;
452         }
453
454         /*
455          * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
456          * place; the cache may contain the data that was written unencrypted.
457          */
458         sev_clflush_pages(inpages, npages);
459
460         for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
461                 int offset, len;
462
463                 /*
464                  * If the user buffer is not page-aligned, calculate the offset
465                  * within the page.
466                  */
467                 offset = vaddr & (PAGE_SIZE - 1);
468
469                 /* Calculate the number of pages that can be encrypted in one go. */
470                 pages = get_num_contig_pages(i, inpages, npages);
471
472                 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
473
474                 data->handle = sev->handle;
475                 data->len = len;
476                 data->address = __sme_page_pa(inpages[i]) + offset;
477                 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
478                 if (ret)
479                         goto e_unpin;
480
481                 size -= len;
482                 next_vaddr = vaddr + len;
483         }
484
485 e_unpin:
486         /* content of memory is updated, mark pages dirty */
487         for (i = 0; i < npages; i++) {
488                 set_page_dirty_lock(inpages[i]);
489                 mark_page_accessed(inpages[i]);
490         }
491         /* unlock the user pages */
492         sev_unpin_memory(kvm, inpages, npages);
493 e_free:
494         kfree(data);
495         return ret;
496 }
497
498 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
499 {
500         void __user *measure = (void __user *)(uintptr_t)argp->data;
501         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
502         struct sev_data_launch_measure *data;
503         struct kvm_sev_launch_measure params;
504         void __user *p = NULL;
505         void *blob = NULL;
506         int ret;
507
508         if (!sev_guest(kvm))
509                 return -ENOTTY;
510
511         if (copy_from_user(&params, measure, sizeof(params)))
512                 return -EFAULT;
513
514         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
515         if (!data)
516                 return -ENOMEM;
517
518         /* User wants to query the blob length */
519         if (!params.len)
520                 goto cmd;
521
522         p = (void __user *)(uintptr_t)params.uaddr;
523         if (p) {
524                 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
525                         ret = -EINVAL;
526                         goto e_free;
527                 }
528
529                 ret = -ENOMEM;
530                 blob = kmalloc(params.len, GFP_KERNEL);
531                 if (!blob)
532                         goto e_free;
533
534                 data->address = __psp_pa(blob);
535                 data->len = params.len;
536         }
537
538 cmd:
539         data->handle = sev->handle;
540         ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
541
542         /*
543          * If we query the session length, FW responded with expected data.
544          */
545         if (!params.len)
546                 goto done;
547
548         if (ret)
549                 goto e_free_blob;
550
551         if (blob) {
552                 if (copy_to_user(p, blob, params.len))
553                         ret = -EFAULT;
554         }
555
556 done:
557         params.len = data->len;
558         if (copy_to_user(measure, &params, sizeof(params)))
559                 ret = -EFAULT;
560 e_free_blob:
561         kfree(blob);
562 e_free:
563         kfree(data);
564         return ret;
565 }
566
567 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
568 {
569         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
570         struct sev_data_launch_finish *data;
571         int ret;
572
573         if (!sev_guest(kvm))
574                 return -ENOTTY;
575
576         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
577         if (!data)
578                 return -ENOMEM;
579
580         data->handle = sev->handle;
581         ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
582
583         kfree(data);
584         return ret;
585 }
586
587 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
588 {
589         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
590         struct kvm_sev_guest_status params;
591         struct sev_data_guest_status *data;
592         int ret;
593
594         if (!sev_guest(kvm))
595                 return -ENOTTY;
596
597         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
598         if (!data)
599                 return -ENOMEM;
600
601         data->handle = sev->handle;
602         ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
603         if (ret)
604                 goto e_free;
605
606         params.policy = data->policy;
607         params.state = data->state;
608         params.handle = data->handle;
609
610         if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
611                 ret = -EFAULT;
612 e_free:
613         kfree(data);
614         return ret;
615 }
616
617 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
618                                unsigned long dst, int size,
619                                int *error, bool enc)
620 {
621         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
622         struct sev_data_dbg *data;
623         int ret;
624
625         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
626         if (!data)
627                 return -ENOMEM;
628
629         data->handle = sev->handle;
630         data->dst_addr = dst;
631         data->src_addr = src;
632         data->len = size;
633
634         ret = sev_issue_cmd(kvm,
635                             enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
636                             data, error);
637         kfree(data);
638         return ret;
639 }
640
641 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
642                              unsigned long dst_paddr, int sz, int *err)
643 {
644         int offset;
645
646         /*
647          * Its safe to read more than we are asked, caller should ensure that
648          * destination has enough space.
649          */
650         src_paddr = round_down(src_paddr, 16);
651         offset = src_paddr & 15;
652         sz = round_up(sz + offset, 16);
653
654         return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
655 }
656
657 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
658                                   unsigned long __user dst_uaddr,
659                                   unsigned long dst_paddr,
660                                   int size, int *err)
661 {
662         struct page *tpage = NULL;
663         int ret, offset;
664
665         /* if inputs are not 16-byte then use intermediate buffer */
666         if (!IS_ALIGNED(dst_paddr, 16) ||
667             !IS_ALIGNED(paddr,     16) ||
668             !IS_ALIGNED(size,      16)) {
669                 tpage = (void *)alloc_page(GFP_KERNEL);
670                 if (!tpage)
671                         return -ENOMEM;
672
673                 dst_paddr = __sme_page_pa(tpage);
674         }
675
676         ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
677         if (ret)
678                 goto e_free;
679
680         if (tpage) {
681                 offset = paddr & 15;
682                 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
683                                  page_address(tpage) + offset, size))
684                         ret = -EFAULT;
685         }
686
687 e_free:
688         if (tpage)
689                 __free_page(tpage);
690
691         return ret;
692 }
693
694 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
695                                   unsigned long __user vaddr,
696                                   unsigned long dst_paddr,
697                                   unsigned long __user dst_vaddr,
698                                   int size, int *error)
699 {
700         struct page *src_tpage = NULL;
701         struct page *dst_tpage = NULL;
702         int ret, len = size;
703
704         /* If source buffer is not aligned then use an intermediate buffer */
705         if (!IS_ALIGNED(vaddr, 16)) {
706                 src_tpage = alloc_page(GFP_KERNEL);
707                 if (!src_tpage)
708                         return -ENOMEM;
709
710                 if (copy_from_user(page_address(src_tpage),
711                                 (void __user *)(uintptr_t)vaddr, size)) {
712                         __free_page(src_tpage);
713                         return -EFAULT;
714                 }
715
716                 paddr = __sme_page_pa(src_tpage);
717         }
718
719         /*
720          *  If destination buffer or length is not aligned then do read-modify-write:
721          *   - decrypt destination in an intermediate buffer
722          *   - copy the source buffer in an intermediate buffer
723          *   - use the intermediate buffer as source buffer
724          */
725         if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
726                 int dst_offset;
727
728                 dst_tpage = alloc_page(GFP_KERNEL);
729                 if (!dst_tpage) {
730                         ret = -ENOMEM;
731                         goto e_free;
732                 }
733
734                 ret = __sev_dbg_decrypt(kvm, dst_paddr,
735                                         __sme_page_pa(dst_tpage), size, error);
736                 if (ret)
737                         goto e_free;
738
739                 /*
740                  *  If source is kernel buffer then use memcpy() otherwise
741                  *  copy_from_user().
742                  */
743                 dst_offset = dst_paddr & 15;
744
745                 if (src_tpage)
746                         memcpy(page_address(dst_tpage) + dst_offset,
747                                page_address(src_tpage), size);
748                 else {
749                         if (copy_from_user(page_address(dst_tpage) + dst_offset,
750                                            (void __user *)(uintptr_t)vaddr, size)) {
751                                 ret = -EFAULT;
752                                 goto e_free;
753                         }
754                 }
755
756                 paddr = __sme_page_pa(dst_tpage);
757                 dst_paddr = round_down(dst_paddr, 16);
758                 len = round_up(size, 16);
759         }
760
761         ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
762
763 e_free:
764         if (src_tpage)
765                 __free_page(src_tpage);
766         if (dst_tpage)
767                 __free_page(dst_tpage);
768         return ret;
769 }
770
771 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
772 {
773         unsigned long vaddr, vaddr_end, next_vaddr;
774         unsigned long dst_vaddr;
775         struct page **src_p, **dst_p;
776         struct kvm_sev_dbg debug;
777         unsigned long n;
778         unsigned int size;
779         int ret;
780
781         if (!sev_guest(kvm))
782                 return -ENOTTY;
783
784         if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
785                 return -EFAULT;
786
787         if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
788                 return -EINVAL;
789         if (!debug.dst_uaddr)
790                 return -EINVAL;
791
792         vaddr = debug.src_uaddr;
793         size = debug.len;
794         vaddr_end = vaddr + size;
795         dst_vaddr = debug.dst_uaddr;
796
797         for (; vaddr < vaddr_end; vaddr = next_vaddr) {
798                 int len, s_off, d_off;
799
800                 /* lock userspace source and destination page */
801                 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
802                 if (IS_ERR(src_p))
803                         return PTR_ERR(src_p);
804
805                 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
806                 if (IS_ERR(dst_p)) {
807                         sev_unpin_memory(kvm, src_p, n);
808                         return PTR_ERR(dst_p);
809                 }
810
811                 /*
812                  * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
813                  * the pages; flush the destination too so that future accesses do not
814                  * see stale data.
815                  */
816                 sev_clflush_pages(src_p, 1);
817                 sev_clflush_pages(dst_p, 1);
818
819                 /*
820                  * Since user buffer may not be page aligned, calculate the
821                  * offset within the page.
822                  */
823                 s_off = vaddr & ~PAGE_MASK;
824                 d_off = dst_vaddr & ~PAGE_MASK;
825                 len = min_t(size_t, (PAGE_SIZE - s_off), size);
826
827                 if (dec)
828                         ret = __sev_dbg_decrypt_user(kvm,
829                                                      __sme_page_pa(src_p[0]) + s_off,
830                                                      dst_vaddr,
831                                                      __sme_page_pa(dst_p[0]) + d_off,
832                                                      len, &argp->error);
833                 else
834                         ret = __sev_dbg_encrypt_user(kvm,
835                                                      __sme_page_pa(src_p[0]) + s_off,
836                                                      vaddr,
837                                                      __sme_page_pa(dst_p[0]) + d_off,
838                                                      dst_vaddr,
839                                                      len, &argp->error);
840
841                 sev_unpin_memory(kvm, src_p, n);
842                 sev_unpin_memory(kvm, dst_p, n);
843
844                 if (ret)
845                         goto err;
846
847                 next_vaddr = vaddr + len;
848                 dst_vaddr = dst_vaddr + len;
849                 size -= len;
850         }
851 err:
852         return ret;
853 }
854
855 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
856 {
857         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
858         struct sev_data_launch_secret *data;
859         struct kvm_sev_launch_secret params;
860         struct page **pages;
861         void *blob, *hdr;
862         unsigned long n, i;
863         int ret, offset;
864
865         if (!sev_guest(kvm))
866                 return -ENOTTY;
867
868         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
869                 return -EFAULT;
870
871         pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
872         if (IS_ERR(pages))
873                 return PTR_ERR(pages);
874
875         /*
876          * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
877          * place; the cache may contain the data that was written unencrypted.
878          */
879         sev_clflush_pages(pages, n);
880
881         /*
882          * The secret must be copied into contiguous memory region, lets verify
883          * that userspace memory pages are contiguous before we issue command.
884          */
885         if (get_num_contig_pages(0, pages, n) != n) {
886                 ret = -EINVAL;
887                 goto e_unpin_memory;
888         }
889
890         ret = -ENOMEM;
891         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
892         if (!data)
893                 goto e_unpin_memory;
894
895         offset = params.guest_uaddr & (PAGE_SIZE - 1);
896         data->guest_address = __sme_page_pa(pages[0]) + offset;
897         data->guest_len = params.guest_len;
898
899         blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
900         if (IS_ERR(blob)) {
901                 ret = PTR_ERR(blob);
902                 goto e_free;
903         }
904
905         data->trans_address = __psp_pa(blob);
906         data->trans_len = params.trans_len;
907
908         hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
909         if (IS_ERR(hdr)) {
910                 ret = PTR_ERR(hdr);
911                 goto e_free_blob;
912         }
913         data->hdr_address = __psp_pa(hdr);
914         data->hdr_len = params.hdr_len;
915
916         data->handle = sev->handle;
917         ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
918
919         kfree(hdr);
920
921 e_free_blob:
922         kfree(blob);
923 e_free:
924         kfree(data);
925 e_unpin_memory:
926         /* content of memory is updated, mark pages dirty */
927         for (i = 0; i < n; i++) {
928                 set_page_dirty_lock(pages[i]);
929                 mark_page_accessed(pages[i]);
930         }
931         sev_unpin_memory(kvm, pages, n);
932         return ret;
933 }
934
935 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
936 {
937         struct kvm_sev_cmd sev_cmd;
938         int r;
939
940         if (!svm_sev_enabled() || !sev)
941                 return -ENOTTY;
942
943         if (!argp)
944                 return 0;
945
946         if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
947                 return -EFAULT;
948
949         mutex_lock(&kvm->lock);
950
951         switch (sev_cmd.id) {
952         case KVM_SEV_INIT:
953                 r = sev_guest_init(kvm, &sev_cmd);
954                 break;
955         case KVM_SEV_LAUNCH_START:
956                 r = sev_launch_start(kvm, &sev_cmd);
957                 break;
958         case KVM_SEV_LAUNCH_UPDATE_DATA:
959                 r = sev_launch_update_data(kvm, &sev_cmd);
960                 break;
961         case KVM_SEV_LAUNCH_MEASURE:
962                 r = sev_launch_measure(kvm, &sev_cmd);
963                 break;
964         case KVM_SEV_LAUNCH_FINISH:
965                 r = sev_launch_finish(kvm, &sev_cmd);
966                 break;
967         case KVM_SEV_GUEST_STATUS:
968                 r = sev_guest_status(kvm, &sev_cmd);
969                 break;
970         case KVM_SEV_DBG_DECRYPT:
971                 r = sev_dbg_crypt(kvm, &sev_cmd, true);
972                 break;
973         case KVM_SEV_DBG_ENCRYPT:
974                 r = sev_dbg_crypt(kvm, &sev_cmd, false);
975                 break;
976         case KVM_SEV_LAUNCH_SECRET:
977                 r = sev_launch_secret(kvm, &sev_cmd);
978                 break;
979         default:
980                 r = -EINVAL;
981                 goto out;
982         }
983
984         if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
985                 r = -EFAULT;
986
987 out:
988         mutex_unlock(&kvm->lock);
989         return r;
990 }
991
992 int svm_register_enc_region(struct kvm *kvm,
993                             struct kvm_enc_region *range)
994 {
995         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
996         struct enc_region *region;
997         int ret = 0;
998
999         if (!sev_guest(kvm))
1000                 return -ENOTTY;
1001
1002         if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
1003                 return -EINVAL;
1004
1005         region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
1006         if (!region)
1007                 return -ENOMEM;
1008
1009         region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
1010         if (IS_ERR(region->pages)) {
1011                 ret = PTR_ERR(region->pages);
1012                 goto e_free;
1013         }
1014
1015         /*
1016          * The guest may change the memory encryption attribute from C=0 -> C=1
1017          * or vice versa for this memory range. Lets make sure caches are
1018          * flushed to ensure that guest data gets written into memory with
1019          * correct C-bit.
1020          */
1021         sev_clflush_pages(region->pages, region->npages);
1022
1023         region->uaddr = range->addr;
1024         region->size = range->size;
1025
1026         mutex_lock(&kvm->lock);
1027         list_add_tail(&region->list, &sev->regions_list);
1028         mutex_unlock(&kvm->lock);
1029
1030         return ret;
1031
1032 e_free:
1033         kfree(region);
1034         return ret;
1035 }
1036
1037 static struct enc_region *
1038 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
1039 {
1040         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1041         struct list_head *head = &sev->regions_list;
1042         struct enc_region *i;
1043
1044         list_for_each_entry(i, head, list) {
1045                 if (i->uaddr == range->addr &&
1046                     i->size == range->size)
1047                         return i;
1048         }
1049
1050         return NULL;
1051 }
1052
1053 static void __unregister_enc_region_locked(struct kvm *kvm,
1054                                            struct enc_region *region)
1055 {
1056         sev_unpin_memory(kvm, region->pages, region->npages);
1057         list_del(&region->list);
1058         kfree(region);
1059 }
1060
1061 int svm_unregister_enc_region(struct kvm *kvm,
1062                               struct kvm_enc_region *range)
1063 {
1064         struct enc_region *region;
1065         int ret;
1066
1067         mutex_lock(&kvm->lock);
1068
1069         if (!sev_guest(kvm)) {
1070                 ret = -ENOTTY;
1071                 goto failed;
1072         }
1073
1074         region = find_enc_region(kvm, range);
1075         if (!region) {
1076                 ret = -EINVAL;
1077                 goto failed;
1078         }
1079
1080         /*
1081          * Ensure that all guest tagged cache entries are flushed before
1082          * releasing the pages back to the system for use. CLFLUSH will
1083          * not do this, so issue a WBINVD.
1084          */
1085         wbinvd_on_all_cpus();
1086
1087         __unregister_enc_region_locked(kvm, region);
1088
1089         mutex_unlock(&kvm->lock);
1090         return 0;
1091
1092 failed:
1093         mutex_unlock(&kvm->lock);
1094         return ret;
1095 }
1096
1097 void sev_vm_destroy(struct kvm *kvm)
1098 {
1099         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1100         struct list_head *head = &sev->regions_list;
1101         struct list_head *pos, *q;
1102
1103         if (!sev_guest(kvm))
1104                 return;
1105
1106         mutex_lock(&kvm->lock);
1107
1108         /*
1109          * Ensure that all guest tagged cache entries are flushed before
1110          * releasing the pages back to the system for use. CLFLUSH will
1111          * not do this, so issue a WBINVD.
1112          */
1113         wbinvd_on_all_cpus();
1114
1115         /*
1116          * if userspace was terminated before unregistering the memory regions
1117          * then lets unpin all the registered memory.
1118          */
1119         if (!list_empty(head)) {
1120                 list_for_each_safe(pos, q, head) {
1121                         __unregister_enc_region_locked(kvm,
1122                                 list_entry(pos, struct enc_region, list));
1123                         cond_resched();
1124                 }
1125         }
1126
1127         mutex_unlock(&kvm->lock);
1128
1129         sev_unbind_asid(kvm, sev->handle);
1130         sev_asid_free(sev->asid);
1131 }
1132
1133 void __init sev_hardware_setup(void)
1134 {
1135         unsigned int eax, ebx, ecx, edx;
1136         bool sev_es_supported = false;
1137         bool sev_supported = false;
1138
1139         /* Does the CPU support SEV? */
1140         if (!boot_cpu_has(X86_FEATURE_SEV))
1141                 goto out;
1142
1143         /* Retrieve SEV CPUID information */
1144         cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1145
1146         /* Set encryption bit location for SEV-ES guests */
1147         sev_enc_bit = ebx & 0x3f;
1148
1149         /* Maximum number of encrypted guests supported simultaneously */
1150         max_sev_asid = ecx;
1151
1152         if (!svm_sev_enabled())
1153                 goto out;
1154
1155         /* Minimum ASID value that should be used for SEV guest */
1156         min_sev_asid = edx;
1157
1158         /* Initialize SEV ASID bitmaps */
1159         sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1160         if (!sev_asid_bitmap)
1161                 goto out;
1162
1163         sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1164         if (!sev_reclaim_asid_bitmap)
1165                 goto out;
1166
1167         pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
1168         sev_supported = true;
1169
1170         /* SEV-ES support requested? */
1171         if (!sev_es)
1172                 goto out;
1173
1174         /* Does the CPU support SEV-ES? */
1175         if (!boot_cpu_has(X86_FEATURE_SEV_ES))
1176                 goto out;
1177
1178         /* Has the system been allocated ASIDs for SEV-ES? */
1179         if (min_sev_asid == 1)
1180                 goto out;
1181
1182         pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
1183         sev_es_supported = true;
1184
1185 out:
1186         sev = sev_supported;
1187         sev_es = sev_es_supported;
1188 }
1189
1190 void sev_hardware_teardown(void)
1191 {
1192         if (!svm_sev_enabled())
1193                 return;
1194
1195         bitmap_free(sev_asid_bitmap);
1196         bitmap_free(sev_reclaim_asid_bitmap);
1197
1198         sev_flush_asids();
1199 }
1200
1201 /*
1202  * Pages used by hardware to hold guest encrypted state must be flushed before
1203  * returning them to the system.
1204  */
1205 static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
1206                                    unsigned long len)
1207 {
1208         /*
1209          * If hardware enforced cache coherency for encrypted mappings of the
1210          * same physical page is supported, nothing to do.
1211          */
1212         if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
1213                 return;
1214
1215         /*
1216          * If the VM Page Flush MSR is supported, use it to flush the page
1217          * (using the page virtual address and the guest ASID).
1218          */
1219         if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
1220                 struct kvm_sev_info *sev;
1221                 unsigned long va_start;
1222                 u64 start, stop;
1223
1224                 /* Align start and stop to page boundaries. */
1225                 va_start = (unsigned long)va;
1226                 start = (u64)va_start & PAGE_MASK;
1227                 stop = PAGE_ALIGN((u64)va_start + len);
1228
1229                 if (start < stop) {
1230                         sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
1231
1232                         while (start < stop) {
1233                                 wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
1234                                        start | sev->asid);
1235
1236                                 start += PAGE_SIZE;
1237                         }
1238
1239                         return;
1240                 }
1241
1242                 WARN(1, "Address overflow, using WBINVD\n");
1243         }
1244
1245         /*
1246          * Hardware should always have one of the above features,
1247          * but if not, use WBINVD and issue a warning.
1248          */
1249         WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
1250         wbinvd_on_all_cpus();
1251 }
1252
1253 void sev_free_vcpu(struct kvm_vcpu *vcpu)
1254 {
1255         struct vcpu_svm *svm;
1256
1257         if (!sev_es_guest(vcpu->kvm))
1258                 return;
1259
1260         svm = to_svm(vcpu);
1261
1262         if (vcpu->arch.guest_state_protected)
1263                 sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
1264         __free_page(virt_to_page(svm->vmsa));
1265
1266         if (svm->ghcb_sa_free)
1267                 kfree(svm->ghcb_sa);
1268 }
1269
1270 static void dump_ghcb(struct vcpu_svm *svm)
1271 {
1272         struct ghcb *ghcb = svm->ghcb;
1273         unsigned int nbits;
1274
1275         /* Re-use the dump_invalid_vmcb module parameter */
1276         if (!dump_invalid_vmcb) {
1277                 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
1278                 return;
1279         }
1280
1281         nbits = sizeof(ghcb->save.valid_bitmap) * 8;
1282
1283         pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
1284         pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
1285                ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
1286         pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
1287                ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
1288         pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
1289                ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
1290         pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
1291                ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
1292         pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
1293 }
1294
1295 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
1296 {
1297         struct kvm_vcpu *vcpu = &svm->vcpu;
1298         struct ghcb *ghcb = svm->ghcb;
1299
1300         /*
1301          * The GHCB protocol so far allows for the following data
1302          * to be returned:
1303          *   GPRs RAX, RBX, RCX, RDX
1304          *
1305          * Copy their values to the GHCB if they are dirty.
1306          */
1307         if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
1308                 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
1309         if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
1310                 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
1311         if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
1312                 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
1313         if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
1314                 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
1315 }
1316
1317 static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
1318 {
1319         struct vmcb_control_area *control = &svm->vmcb->control;
1320         struct kvm_vcpu *vcpu = &svm->vcpu;
1321         struct ghcb *ghcb = svm->ghcb;
1322         u64 exit_code;
1323
1324         /*
1325          * The GHCB protocol so far allows for the following data
1326          * to be supplied:
1327          *   GPRs RAX, RBX, RCX, RDX
1328          *   XCR0
1329          *   CPL
1330          *
1331          * VMMCALL allows the guest to provide extra registers. KVM also
1332          * expects RSI for hypercalls, so include that, too.
1333          *
1334          * Copy their values to the appropriate location if supplied.
1335          */
1336         memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
1337
1338         vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
1339         vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
1340         vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
1341         vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
1342         vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
1343
1344         svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
1345
1346         if (ghcb_xcr0_is_valid(ghcb)) {
1347                 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
1348                 kvm_update_cpuid_runtime(vcpu);
1349         }
1350
1351         /* Copy the GHCB exit information into the VMCB fields */
1352         exit_code = ghcb_get_sw_exit_code(ghcb);
1353         control->exit_code = lower_32_bits(exit_code);
1354         control->exit_code_hi = upper_32_bits(exit_code);
1355         control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
1356         control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
1357
1358         /* Clear the valid entries fields */
1359         memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
1360 }
1361
1362 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
1363 {
1364         struct kvm_vcpu *vcpu;
1365         struct ghcb *ghcb;
1366         u64 exit_code = 0;
1367
1368         ghcb = svm->ghcb;
1369
1370         /* Only GHCB Usage code 0 is supported */
1371         if (ghcb->ghcb_usage)
1372                 goto vmgexit_err;
1373
1374         /*
1375          * Retrieve the exit code now even though is may not be marked valid
1376          * as it could help with debugging.
1377          */
1378         exit_code = ghcb_get_sw_exit_code(ghcb);
1379
1380         if (!ghcb_sw_exit_code_is_valid(ghcb) ||
1381             !ghcb_sw_exit_info_1_is_valid(ghcb) ||
1382             !ghcb_sw_exit_info_2_is_valid(ghcb))
1383                 goto vmgexit_err;
1384
1385         switch (ghcb_get_sw_exit_code(ghcb)) {
1386         case SVM_EXIT_READ_DR7:
1387                 break;
1388         case SVM_EXIT_WRITE_DR7:
1389                 if (!ghcb_rax_is_valid(ghcb))
1390                         goto vmgexit_err;
1391                 break;
1392         case SVM_EXIT_RDTSC:
1393                 break;
1394         case SVM_EXIT_RDPMC:
1395                 if (!ghcb_rcx_is_valid(ghcb))
1396                         goto vmgexit_err;
1397                 break;
1398         case SVM_EXIT_CPUID:
1399                 if (!ghcb_rax_is_valid(ghcb) ||
1400                     !ghcb_rcx_is_valid(ghcb))
1401                         goto vmgexit_err;
1402                 if (ghcb_get_rax(ghcb) == 0xd)
1403                         if (!ghcb_xcr0_is_valid(ghcb))
1404                                 goto vmgexit_err;
1405                 break;
1406         case SVM_EXIT_INVD:
1407                 break;
1408         case SVM_EXIT_IOIO:
1409                 if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
1410                         if (!ghcb_sw_scratch_is_valid(ghcb))
1411                                 goto vmgexit_err;
1412                 } else {
1413                         if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
1414                                 if (!ghcb_rax_is_valid(ghcb))
1415                                         goto vmgexit_err;
1416                 }
1417                 break;
1418         case SVM_EXIT_MSR:
1419                 if (!ghcb_rcx_is_valid(ghcb))
1420                         goto vmgexit_err;
1421                 if (ghcb_get_sw_exit_info_1(ghcb)) {
1422                         if (!ghcb_rax_is_valid(ghcb) ||
1423                             !ghcb_rdx_is_valid(ghcb))
1424                                 goto vmgexit_err;
1425                 }
1426                 break;
1427         case SVM_EXIT_VMMCALL:
1428                 if (!ghcb_rax_is_valid(ghcb) ||
1429                     !ghcb_cpl_is_valid(ghcb))
1430                         goto vmgexit_err;
1431                 break;
1432         case SVM_EXIT_RDTSCP:
1433                 break;
1434         case SVM_EXIT_WBINVD:
1435                 break;
1436         case SVM_EXIT_MONITOR:
1437                 if (!ghcb_rax_is_valid(ghcb) ||
1438                     !ghcb_rcx_is_valid(ghcb) ||
1439                     !ghcb_rdx_is_valid(ghcb))
1440                         goto vmgexit_err;
1441                 break;
1442         case SVM_EXIT_MWAIT:
1443                 if (!ghcb_rax_is_valid(ghcb) ||
1444                     !ghcb_rcx_is_valid(ghcb))
1445                         goto vmgexit_err;
1446                 break;
1447         case SVM_VMGEXIT_MMIO_READ:
1448         case SVM_VMGEXIT_MMIO_WRITE:
1449                 if (!ghcb_sw_scratch_is_valid(ghcb))
1450                         goto vmgexit_err;
1451                 break;
1452         case SVM_VMGEXIT_NMI_COMPLETE:
1453         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1454                 break;
1455         default:
1456                 goto vmgexit_err;
1457         }
1458
1459         return 0;
1460
1461 vmgexit_err:
1462         vcpu = &svm->vcpu;
1463
1464         if (ghcb->ghcb_usage) {
1465                 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
1466                             ghcb->ghcb_usage);
1467         } else {
1468                 vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
1469                             exit_code);
1470                 dump_ghcb(svm);
1471         }
1472
1473         vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1474         vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
1475         vcpu->run->internal.ndata = 2;
1476         vcpu->run->internal.data[0] = exit_code;
1477         vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
1478
1479         return -EINVAL;
1480 }
1481
1482 static void pre_sev_es_run(struct vcpu_svm *svm)
1483 {
1484         if (!svm->ghcb)
1485                 return;
1486
1487         if (svm->ghcb_sa_free) {
1488                 /*
1489                  * The scratch area lives outside the GHCB, so there is a
1490                  * buffer that, depending on the operation performed, may
1491                  * need to be synced, then freed.
1492                  */
1493                 if (svm->ghcb_sa_sync) {
1494                         kvm_write_guest(svm->vcpu.kvm,
1495                                         ghcb_get_sw_scratch(svm->ghcb),
1496                                         svm->ghcb_sa, svm->ghcb_sa_len);
1497                         svm->ghcb_sa_sync = false;
1498                 }
1499
1500                 kfree(svm->ghcb_sa);
1501                 svm->ghcb_sa = NULL;
1502                 svm->ghcb_sa_free = false;
1503         }
1504
1505         trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
1506
1507         sev_es_sync_to_ghcb(svm);
1508
1509         kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
1510         svm->ghcb = NULL;
1511 }
1512
1513 void pre_sev_run(struct vcpu_svm *svm, int cpu)
1514 {
1515         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1516         int asid = sev_get_asid(svm->vcpu.kvm);
1517
1518         /* Perform any SEV-ES pre-run actions */
1519         pre_sev_es_run(svm);
1520
1521         /* Assign the asid allocated with this SEV guest */
1522         svm->asid = asid;
1523
1524         /*
1525          * Flush guest TLB:
1526          *
1527          * 1) when different VMCB for the same ASID is to be run on the same host CPU.
1528          * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
1529          */
1530         if (sd->sev_vmcbs[asid] == svm->vmcb &&
1531             svm->vcpu.arch.last_vmentry_cpu == cpu)
1532                 return;
1533
1534         sd->sev_vmcbs[asid] = svm->vmcb;
1535         svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
1536         vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
1537 }
1538
1539 #define GHCB_SCRATCH_AREA_LIMIT         (16ULL * PAGE_SIZE)
1540 static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
1541 {
1542         struct vmcb_control_area *control = &svm->vmcb->control;
1543         struct ghcb *ghcb = svm->ghcb;
1544         u64 ghcb_scratch_beg, ghcb_scratch_end;
1545         u64 scratch_gpa_beg, scratch_gpa_end;
1546         void *scratch_va;
1547
1548         scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
1549         if (!scratch_gpa_beg) {
1550                 pr_err("vmgexit: scratch gpa not provided\n");
1551                 return false;
1552         }
1553
1554         scratch_gpa_end = scratch_gpa_beg + len;
1555         if (scratch_gpa_end < scratch_gpa_beg) {
1556                 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
1557                        len, scratch_gpa_beg);
1558                 return false;
1559         }
1560
1561         if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
1562                 /* Scratch area begins within GHCB */
1563                 ghcb_scratch_beg = control->ghcb_gpa +
1564                                    offsetof(struct ghcb, shared_buffer);
1565                 ghcb_scratch_end = control->ghcb_gpa +
1566                                    offsetof(struct ghcb, reserved_1);
1567
1568                 /*
1569                  * If the scratch area begins within the GHCB, it must be
1570                  * completely contained in the GHCB shared buffer area.
1571                  */
1572                 if (scratch_gpa_beg < ghcb_scratch_beg ||
1573                     scratch_gpa_end > ghcb_scratch_end) {
1574                         pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
1575                                scratch_gpa_beg, scratch_gpa_end);
1576                         return false;
1577                 }
1578
1579                 scratch_va = (void *)svm->ghcb;
1580                 scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
1581         } else {
1582                 /*
1583                  * The guest memory must be read into a kernel buffer, so
1584                  * limit the size
1585                  */
1586                 if (len > GHCB_SCRATCH_AREA_LIMIT) {
1587                         pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
1588                                len, GHCB_SCRATCH_AREA_LIMIT);
1589                         return false;
1590                 }
1591                 scratch_va = kzalloc(len, GFP_KERNEL);
1592                 if (!scratch_va)
1593                         return false;
1594
1595                 if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
1596                         /* Unable to copy scratch area from guest */
1597                         pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
1598
1599                         kfree(scratch_va);
1600                         return false;
1601                 }
1602
1603                 /*
1604                  * The scratch area is outside the GHCB. The operation will
1605                  * dictate whether the buffer needs to be synced before running
1606                  * the vCPU next time (i.e. a read was requested so the data
1607                  * must be written back to the guest memory).
1608                  */
1609                 svm->ghcb_sa_sync = sync;
1610                 svm->ghcb_sa_free = true;
1611         }
1612
1613         svm->ghcb_sa = scratch_va;
1614         svm->ghcb_sa_len = len;
1615
1616         return true;
1617 }
1618
1619 static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
1620                               unsigned int pos)
1621 {
1622         svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
1623         svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
1624 }
1625
1626 static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
1627 {
1628         return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
1629 }
1630
1631 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
1632 {
1633         svm->vmcb->control.ghcb_gpa = value;
1634 }
1635
1636 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
1637 {
1638         struct vmcb_control_area *control = &svm->vmcb->control;
1639         struct kvm_vcpu *vcpu = &svm->vcpu;
1640         u64 ghcb_info;
1641         int ret = 1;
1642
1643         ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
1644
1645         trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
1646                                              control->ghcb_gpa);
1647
1648         switch (ghcb_info) {
1649         case GHCB_MSR_SEV_INFO_REQ:
1650                 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
1651                                                     GHCB_VERSION_MIN,
1652                                                     sev_enc_bit));
1653                 break;
1654         case GHCB_MSR_CPUID_REQ: {
1655                 u64 cpuid_fn, cpuid_reg, cpuid_value;
1656
1657                 cpuid_fn = get_ghcb_msr_bits(svm,
1658                                              GHCB_MSR_CPUID_FUNC_MASK,
1659                                              GHCB_MSR_CPUID_FUNC_POS);
1660
1661                 /* Initialize the registers needed by the CPUID intercept */
1662                 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
1663                 vcpu->arch.regs[VCPU_REGS_RCX] = 0;
1664
1665                 ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
1666                 if (!ret) {
1667                         ret = -EINVAL;
1668                         break;
1669                 }
1670
1671                 cpuid_reg = get_ghcb_msr_bits(svm,
1672                                               GHCB_MSR_CPUID_REG_MASK,
1673                                               GHCB_MSR_CPUID_REG_POS);
1674                 if (cpuid_reg == 0)
1675                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
1676                 else if (cpuid_reg == 1)
1677                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
1678                 else if (cpuid_reg == 2)
1679                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
1680                 else
1681                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
1682
1683                 set_ghcb_msr_bits(svm, cpuid_value,
1684                                   GHCB_MSR_CPUID_VALUE_MASK,
1685                                   GHCB_MSR_CPUID_VALUE_POS);
1686
1687                 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
1688                                   GHCB_MSR_INFO_MASK,
1689                                   GHCB_MSR_INFO_POS);
1690                 break;
1691         }
1692         case GHCB_MSR_TERM_REQ: {
1693                 u64 reason_set, reason_code;
1694
1695                 reason_set = get_ghcb_msr_bits(svm,
1696                                                GHCB_MSR_TERM_REASON_SET_MASK,
1697                                                GHCB_MSR_TERM_REASON_SET_POS);
1698                 reason_code = get_ghcb_msr_bits(svm,
1699                                                 GHCB_MSR_TERM_REASON_MASK,
1700                                                 GHCB_MSR_TERM_REASON_POS);
1701                 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
1702                         reason_set, reason_code);
1703                 fallthrough;
1704         }
1705         default:
1706                 ret = -EINVAL;
1707         }
1708
1709         trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
1710                                             control->ghcb_gpa, ret);
1711
1712         return ret;
1713 }
1714
1715 int sev_handle_vmgexit(struct vcpu_svm *svm)
1716 {
1717         struct vmcb_control_area *control = &svm->vmcb->control;
1718         u64 ghcb_gpa, exit_code;
1719         struct ghcb *ghcb;
1720         int ret;
1721
1722         /* Validate the GHCB */
1723         ghcb_gpa = control->ghcb_gpa;
1724         if (ghcb_gpa & GHCB_MSR_INFO_MASK)
1725                 return sev_handle_vmgexit_msr_protocol(svm);
1726
1727         if (!ghcb_gpa) {
1728                 vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
1729                 return -EINVAL;
1730         }
1731
1732         if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
1733                 /* Unable to map GHCB from guest */
1734                 vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
1735                             ghcb_gpa);
1736                 return -EINVAL;
1737         }
1738
1739         svm->ghcb = svm->ghcb_map.hva;
1740         ghcb = svm->ghcb_map.hva;
1741
1742         trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
1743
1744         exit_code = ghcb_get_sw_exit_code(ghcb);
1745
1746         ret = sev_es_validate_vmgexit(svm);
1747         if (ret)
1748                 return ret;
1749
1750         sev_es_sync_from_ghcb(svm);
1751         ghcb_set_sw_exit_info_1(ghcb, 0);
1752         ghcb_set_sw_exit_info_2(ghcb, 0);
1753
1754         ret = -EINVAL;
1755         switch (exit_code) {
1756         case SVM_VMGEXIT_MMIO_READ:
1757                 if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
1758                         break;
1759
1760                 ret = kvm_sev_es_mmio_read(&svm->vcpu,
1761                                            control->exit_info_1,
1762                                            control->exit_info_2,
1763                                            svm->ghcb_sa);
1764                 break;
1765         case SVM_VMGEXIT_MMIO_WRITE:
1766                 if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
1767                         break;
1768
1769                 ret = kvm_sev_es_mmio_write(&svm->vcpu,
1770                                             control->exit_info_1,
1771                                             control->exit_info_2,
1772                                             svm->ghcb_sa);
1773                 break;
1774         case SVM_VMGEXIT_NMI_COMPLETE:
1775                 ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
1776                 break;
1777         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1778                 vcpu_unimpl(&svm->vcpu,
1779                             "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
1780                             control->exit_info_1, control->exit_info_2);
1781                 break;
1782         default:
1783                 ret = svm_invoke_exit_handler(svm, exit_code);
1784         }
1785
1786         return ret;
1787 }
1788
1789 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
1790 {
1791         if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
1792                 return -EINVAL;
1793
1794         return kvm_sev_es_string_io(&svm->vcpu, size, port,
1795                                     svm->ghcb_sa, svm->ghcb_sa_len, in);
1796 }