firewire: core: add memo about the caller of show functions for device attributes
[sfrench/cifs-2.6.git] / drivers / vdpa / vdpa_user / vduse_dev.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDUSE: vDPA Device in Userspace
4  *
5  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6  *
7  * Author: Xie Yongji <xieyongji@bytedance.com>
8  *
9  */
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/eventfd.h>
16 #include <linux/slab.h>
17 #include <linux/wait.h>
18 #include <linux/dma-map-ops.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/uio.h>
22 #include <linux/vdpa.h>
23 #include <linux/nospec.h>
24 #include <linux/vmalloc.h>
25 #include <linux/sched/mm.h>
26 #include <uapi/linux/vduse.h>
27 #include <uapi/linux/vdpa.h>
28 #include <uapi/linux/virtio_config.h>
29 #include <uapi/linux/virtio_ids.h>
30 #include <uapi/linux/virtio_blk.h>
31 #include <linux/mod_devicetable.h>
32
33 #include "iova_domain.h"
34
35 #define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
36 #define DRV_DESC     "vDPA Device in Userspace"
37 #define DRV_LICENSE  "GPL v2"
38
39 #define VDUSE_DEV_MAX (1U << MINORBITS)
40 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
41 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
42 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
43 /* 128 MB reserved for virtqueue creation */
44 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
45 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
46
47 #define IRQ_UNBOUND -1
48
49 struct vduse_virtqueue {
50         u16 index;
51         u16 num_max;
52         u32 num;
53         u64 desc_addr;
54         u64 driver_addr;
55         u64 device_addr;
56         struct vdpa_vq_state state;
57         bool ready;
58         bool kicked;
59         spinlock_t kick_lock;
60         spinlock_t irq_lock;
61         struct eventfd_ctx *kickfd;
62         struct vdpa_callback cb;
63         struct work_struct inject;
64         struct work_struct kick;
65         int irq_effective_cpu;
66         struct cpumask irq_affinity;
67         struct kobject kobj;
68 };
69
70 struct vduse_dev;
71
72 struct vduse_vdpa {
73         struct vdpa_device vdpa;
74         struct vduse_dev *dev;
75 };
76
77 struct vduse_umem {
78         unsigned long iova;
79         unsigned long npages;
80         struct page **pages;
81         struct mm_struct *mm;
82 };
83
84 struct vduse_dev {
85         struct vduse_vdpa *vdev;
86         struct device *dev;
87         struct vduse_virtqueue **vqs;
88         struct vduse_iova_domain *domain;
89         char *name;
90         struct mutex lock;
91         spinlock_t msg_lock;
92         u64 msg_unique;
93         u32 msg_timeout;
94         wait_queue_head_t waitq;
95         struct list_head send_list;
96         struct list_head recv_list;
97         struct vdpa_callback config_cb;
98         struct work_struct inject;
99         spinlock_t irq_lock;
100         struct rw_semaphore rwsem;
101         int minor;
102         bool broken;
103         bool connected;
104         u64 api_version;
105         u64 device_features;
106         u64 driver_features;
107         u32 device_id;
108         u32 vendor_id;
109         u32 generation;
110         u32 config_size;
111         void *config;
112         u8 status;
113         u32 vq_num;
114         u32 vq_align;
115         struct vduse_umem *umem;
116         struct mutex mem_lock;
117         unsigned int bounce_size;
118         struct mutex domain_lock;
119 };
120
121 struct vduse_dev_msg {
122         struct vduse_dev_request req;
123         struct vduse_dev_response resp;
124         struct list_head list;
125         wait_queue_head_t waitq;
126         bool completed;
127 };
128
129 struct vduse_control {
130         u64 api_version;
131 };
132
133 static DEFINE_MUTEX(vduse_lock);
134 static DEFINE_IDR(vduse_idr);
135
136 static dev_t vduse_major;
137 static struct cdev vduse_ctrl_cdev;
138 static struct cdev vduse_cdev;
139 static struct workqueue_struct *vduse_irq_wq;
140 static struct workqueue_struct *vduse_irq_bound_wq;
141
142 static u32 allowed_device_id[] = {
143         VIRTIO_ID_BLOCK,
144 };
145
146 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
147 {
148         struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
149
150         return vdev->dev;
151 }
152
153 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
154 {
155         struct vdpa_device *vdpa = dev_to_vdpa(dev);
156
157         return vdpa_to_vduse(vdpa);
158 }
159
160 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
161                                             uint32_t request_id)
162 {
163         struct vduse_dev_msg *msg;
164
165         list_for_each_entry(msg, head, list) {
166                 if (msg->req.request_id == request_id) {
167                         list_del(&msg->list);
168                         return msg;
169                 }
170         }
171
172         return NULL;
173 }
174
175 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
176 {
177         struct vduse_dev_msg *msg = NULL;
178
179         if (!list_empty(head)) {
180                 msg = list_first_entry(head, struct vduse_dev_msg, list);
181                 list_del(&msg->list);
182         }
183
184         return msg;
185 }
186
187 static void vduse_enqueue_msg(struct list_head *head,
188                               struct vduse_dev_msg *msg)
189 {
190         list_add_tail(&msg->list, head);
191 }
192
193 static void vduse_dev_broken(struct vduse_dev *dev)
194 {
195         struct vduse_dev_msg *msg, *tmp;
196
197         if (unlikely(dev->broken))
198                 return;
199
200         list_splice_init(&dev->recv_list, &dev->send_list);
201         list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
202                 list_del(&msg->list);
203                 msg->completed = 1;
204                 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
205                 wake_up(&msg->waitq);
206         }
207         dev->broken = true;
208         wake_up(&dev->waitq);
209 }
210
211 static int vduse_dev_msg_sync(struct vduse_dev *dev,
212                               struct vduse_dev_msg *msg)
213 {
214         int ret;
215
216         if (unlikely(dev->broken))
217                 return -EIO;
218
219         init_waitqueue_head(&msg->waitq);
220         spin_lock(&dev->msg_lock);
221         if (unlikely(dev->broken)) {
222                 spin_unlock(&dev->msg_lock);
223                 return -EIO;
224         }
225         msg->req.request_id = dev->msg_unique++;
226         vduse_enqueue_msg(&dev->send_list, msg);
227         wake_up(&dev->waitq);
228         spin_unlock(&dev->msg_lock);
229         if (dev->msg_timeout)
230                 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
231                                                   (long)dev->msg_timeout * HZ);
232         else
233                 ret = wait_event_killable(msg->waitq, msg->completed);
234
235         spin_lock(&dev->msg_lock);
236         if (!msg->completed) {
237                 list_del(&msg->list);
238                 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
239                 /* Mark the device as malfunction when there is a timeout */
240                 if (!ret)
241                         vduse_dev_broken(dev);
242         }
243         ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
244         spin_unlock(&dev->msg_lock);
245
246         return ret;
247 }
248
249 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
250                                          struct vduse_virtqueue *vq,
251                                          struct vdpa_vq_state_packed *packed)
252 {
253         struct vduse_dev_msg msg = { 0 };
254         int ret;
255
256         msg.req.type = VDUSE_GET_VQ_STATE;
257         msg.req.vq_state.index = vq->index;
258
259         ret = vduse_dev_msg_sync(dev, &msg);
260         if (ret)
261                 return ret;
262
263         packed->last_avail_counter =
264                         msg.resp.vq_state.packed.last_avail_counter & 0x0001;
265         packed->last_avail_idx =
266                         msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
267         packed->last_used_counter =
268                         msg.resp.vq_state.packed.last_used_counter & 0x0001;
269         packed->last_used_idx =
270                         msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
271
272         return 0;
273 }
274
275 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
276                                         struct vduse_virtqueue *vq,
277                                         struct vdpa_vq_state_split *split)
278 {
279         struct vduse_dev_msg msg = { 0 };
280         int ret;
281
282         msg.req.type = VDUSE_GET_VQ_STATE;
283         msg.req.vq_state.index = vq->index;
284
285         ret = vduse_dev_msg_sync(dev, &msg);
286         if (ret)
287                 return ret;
288
289         split->avail_index = msg.resp.vq_state.split.avail_index;
290
291         return 0;
292 }
293
294 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
295 {
296         struct vduse_dev_msg msg = { 0 };
297
298         msg.req.type = VDUSE_SET_STATUS;
299         msg.req.s.status = status;
300
301         return vduse_dev_msg_sync(dev, &msg);
302 }
303
304 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
305                                   u64 start, u64 last)
306 {
307         struct vduse_dev_msg msg = { 0 };
308
309         if (last < start)
310                 return -EINVAL;
311
312         msg.req.type = VDUSE_UPDATE_IOTLB;
313         msg.req.iova.start = start;
314         msg.req.iova.last = last;
315
316         return vduse_dev_msg_sync(dev, &msg);
317 }
318
319 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
320 {
321         struct file *file = iocb->ki_filp;
322         struct vduse_dev *dev = file->private_data;
323         struct vduse_dev_msg *msg;
324         int size = sizeof(struct vduse_dev_request);
325         ssize_t ret;
326
327         if (iov_iter_count(to) < size)
328                 return -EINVAL;
329
330         spin_lock(&dev->msg_lock);
331         while (1) {
332                 msg = vduse_dequeue_msg(&dev->send_list);
333                 if (msg)
334                         break;
335
336                 ret = -EAGAIN;
337                 if (file->f_flags & O_NONBLOCK)
338                         goto unlock;
339
340                 spin_unlock(&dev->msg_lock);
341                 ret = wait_event_interruptible_exclusive(dev->waitq,
342                                         !list_empty(&dev->send_list));
343                 if (ret)
344                         return ret;
345
346                 spin_lock(&dev->msg_lock);
347         }
348         spin_unlock(&dev->msg_lock);
349         ret = copy_to_iter(&msg->req, size, to);
350         spin_lock(&dev->msg_lock);
351         if (ret != size) {
352                 ret = -EFAULT;
353                 vduse_enqueue_msg(&dev->send_list, msg);
354                 goto unlock;
355         }
356         vduse_enqueue_msg(&dev->recv_list, msg);
357 unlock:
358         spin_unlock(&dev->msg_lock);
359
360         return ret;
361 }
362
363 static bool is_mem_zero(const char *ptr, int size)
364 {
365         int i;
366
367         for (i = 0; i < size; i++) {
368                 if (ptr[i])
369                         return false;
370         }
371         return true;
372 }
373
374 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
375 {
376         struct file *file = iocb->ki_filp;
377         struct vduse_dev *dev = file->private_data;
378         struct vduse_dev_response resp;
379         struct vduse_dev_msg *msg;
380         size_t ret;
381
382         ret = copy_from_iter(&resp, sizeof(resp), from);
383         if (ret != sizeof(resp))
384                 return -EINVAL;
385
386         if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
387                 return -EINVAL;
388
389         spin_lock(&dev->msg_lock);
390         msg = vduse_find_msg(&dev->recv_list, resp.request_id);
391         if (!msg) {
392                 ret = -ENOENT;
393                 goto unlock;
394         }
395
396         memcpy(&msg->resp, &resp, sizeof(resp));
397         msg->completed = 1;
398         wake_up(&msg->waitq);
399 unlock:
400         spin_unlock(&dev->msg_lock);
401
402         return ret;
403 }
404
405 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
406 {
407         struct vduse_dev *dev = file->private_data;
408         __poll_t mask = 0;
409
410         poll_wait(file, &dev->waitq, wait);
411
412         spin_lock(&dev->msg_lock);
413
414         if (unlikely(dev->broken))
415                 mask |= EPOLLERR;
416         if (!list_empty(&dev->send_list))
417                 mask |= EPOLLIN | EPOLLRDNORM;
418         if (!list_empty(&dev->recv_list))
419                 mask |= EPOLLOUT | EPOLLWRNORM;
420
421         spin_unlock(&dev->msg_lock);
422
423         return mask;
424 }
425
426 static void vduse_dev_reset(struct vduse_dev *dev)
427 {
428         int i;
429         struct vduse_iova_domain *domain = dev->domain;
430
431         /* The coherent mappings are handled in vduse_dev_free_coherent() */
432         if (domain && domain->bounce_map)
433                 vduse_domain_reset_bounce_map(domain);
434
435         down_write(&dev->rwsem);
436
437         dev->status = 0;
438         dev->driver_features = 0;
439         dev->generation++;
440         spin_lock(&dev->irq_lock);
441         dev->config_cb.callback = NULL;
442         dev->config_cb.private = NULL;
443         spin_unlock(&dev->irq_lock);
444         flush_work(&dev->inject);
445
446         for (i = 0; i < dev->vq_num; i++) {
447                 struct vduse_virtqueue *vq = dev->vqs[i];
448
449                 vq->ready = false;
450                 vq->desc_addr = 0;
451                 vq->driver_addr = 0;
452                 vq->device_addr = 0;
453                 vq->num = 0;
454                 memset(&vq->state, 0, sizeof(vq->state));
455
456                 spin_lock(&vq->kick_lock);
457                 vq->kicked = false;
458                 if (vq->kickfd)
459                         eventfd_ctx_put(vq->kickfd);
460                 vq->kickfd = NULL;
461                 spin_unlock(&vq->kick_lock);
462
463                 spin_lock(&vq->irq_lock);
464                 vq->cb.callback = NULL;
465                 vq->cb.private = NULL;
466                 vq->cb.trigger = NULL;
467                 spin_unlock(&vq->irq_lock);
468                 flush_work(&vq->inject);
469                 flush_work(&vq->kick);
470         }
471
472         up_write(&dev->rwsem);
473 }
474
475 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
476                                 u64 desc_area, u64 driver_area,
477                                 u64 device_area)
478 {
479         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
480         struct vduse_virtqueue *vq = dev->vqs[idx];
481
482         vq->desc_addr = desc_area;
483         vq->driver_addr = driver_area;
484         vq->device_addr = device_area;
485
486         return 0;
487 }
488
489 static void vduse_vq_kick(struct vduse_virtqueue *vq)
490 {
491         spin_lock(&vq->kick_lock);
492         if (!vq->ready)
493                 goto unlock;
494
495         if (vq->kickfd)
496                 eventfd_signal(vq->kickfd);
497         else
498                 vq->kicked = true;
499 unlock:
500         spin_unlock(&vq->kick_lock);
501 }
502
503 static void vduse_vq_kick_work(struct work_struct *work)
504 {
505         struct vduse_virtqueue *vq = container_of(work,
506                                         struct vduse_virtqueue, kick);
507
508         vduse_vq_kick(vq);
509 }
510
511 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
512 {
513         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
514         struct vduse_virtqueue *vq = dev->vqs[idx];
515
516         if (!eventfd_signal_allowed()) {
517                 schedule_work(&vq->kick);
518                 return;
519         }
520         vduse_vq_kick(vq);
521 }
522
523 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
524                               struct vdpa_callback *cb)
525 {
526         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
527         struct vduse_virtqueue *vq = dev->vqs[idx];
528
529         spin_lock(&vq->irq_lock);
530         vq->cb.callback = cb->callback;
531         vq->cb.private = cb->private;
532         vq->cb.trigger = cb->trigger;
533         spin_unlock(&vq->irq_lock);
534 }
535
536 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
537 {
538         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
539         struct vduse_virtqueue *vq = dev->vqs[idx];
540
541         vq->num = num;
542 }
543
544 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
545                                         u16 idx, bool ready)
546 {
547         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
548         struct vduse_virtqueue *vq = dev->vqs[idx];
549
550         vq->ready = ready;
551 }
552
553 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
554 {
555         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
556         struct vduse_virtqueue *vq = dev->vqs[idx];
557
558         return vq->ready;
559 }
560
561 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
562                                 const struct vdpa_vq_state *state)
563 {
564         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
565         struct vduse_virtqueue *vq = dev->vqs[idx];
566
567         if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
568                 vq->state.packed.last_avail_counter =
569                                 state->packed.last_avail_counter;
570                 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
571                 vq->state.packed.last_used_counter =
572                                 state->packed.last_used_counter;
573                 vq->state.packed.last_used_idx = state->packed.last_used_idx;
574         } else
575                 vq->state.split.avail_index = state->split.avail_index;
576
577         return 0;
578 }
579
580 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
581                                 struct vdpa_vq_state *state)
582 {
583         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
584         struct vduse_virtqueue *vq = dev->vqs[idx];
585
586         if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
587                 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
588
589         return vduse_dev_get_vq_state_split(dev, vq, &state->split);
590 }
591
592 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
593 {
594         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
595
596         return dev->vq_align;
597 }
598
599 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
600 {
601         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
602
603         return dev->device_features;
604 }
605
606 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
607 {
608         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
609
610         dev->driver_features = features;
611         return 0;
612 }
613
614 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
615 {
616         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
617
618         return dev->driver_features;
619 }
620
621 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
622                                   struct vdpa_callback *cb)
623 {
624         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
625
626         spin_lock(&dev->irq_lock);
627         dev->config_cb.callback = cb->callback;
628         dev->config_cb.private = cb->private;
629         spin_unlock(&dev->irq_lock);
630 }
631
632 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
633 {
634         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
635         u16 num_max = 0;
636         int i;
637
638         for (i = 0; i < dev->vq_num; i++)
639                 if (num_max < dev->vqs[i]->num_max)
640                         num_max = dev->vqs[i]->num_max;
641
642         return num_max;
643 }
644
645 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
646 {
647         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
648
649         return dev->device_id;
650 }
651
652 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
653 {
654         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
655
656         return dev->vendor_id;
657 }
658
659 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
660 {
661         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
662
663         return dev->status;
664 }
665
666 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
667 {
668         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
669
670         if (vduse_dev_set_status(dev, status))
671                 return;
672
673         dev->status = status;
674 }
675
676 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
677 {
678         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
679
680         return dev->config_size;
681 }
682
683 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
684                                   void *buf, unsigned int len)
685 {
686         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
687
688         /* Initialize the buffer in case of partial copy. */
689         memset(buf, 0, len);
690
691         if (offset > dev->config_size)
692                 return;
693
694         if (len > dev->config_size - offset)
695                 len = dev->config_size - offset;
696
697         memcpy(buf, dev->config + offset, len);
698 }
699
700 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
701                         const void *buf, unsigned int len)
702 {
703         /* Now we only support read-only configuration space */
704 }
705
706 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
707 {
708         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
709         int ret = vduse_dev_set_status(dev, 0);
710
711         vduse_dev_reset(dev);
712
713         return ret;
714 }
715
716 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
717 {
718         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
719
720         return dev->generation;
721 }
722
723 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
724                                       const struct cpumask *cpu_mask)
725 {
726         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
727
728         if (cpu_mask)
729                 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
730         else
731                 cpumask_setall(&dev->vqs[idx]->irq_affinity);
732
733         return 0;
734 }
735
736 static const struct cpumask *
737 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
738 {
739         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
740
741         return &dev->vqs[idx]->irq_affinity;
742 }
743
744 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
745                                 unsigned int asid,
746                                 struct vhost_iotlb *iotlb)
747 {
748         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
749         int ret;
750
751         ret = vduse_domain_set_map(dev->domain, iotlb);
752         if (ret)
753                 return ret;
754
755         ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
756         if (ret) {
757                 vduse_domain_clear_map(dev->domain, iotlb);
758                 return ret;
759         }
760
761         return 0;
762 }
763
764 static void vduse_vdpa_free(struct vdpa_device *vdpa)
765 {
766         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
767
768         dev->vdev = NULL;
769 }
770
771 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
772         .set_vq_address         = vduse_vdpa_set_vq_address,
773         .kick_vq                = vduse_vdpa_kick_vq,
774         .set_vq_cb              = vduse_vdpa_set_vq_cb,
775         .set_vq_num             = vduse_vdpa_set_vq_num,
776         .set_vq_ready           = vduse_vdpa_set_vq_ready,
777         .get_vq_ready           = vduse_vdpa_get_vq_ready,
778         .set_vq_state           = vduse_vdpa_set_vq_state,
779         .get_vq_state           = vduse_vdpa_get_vq_state,
780         .get_vq_align           = vduse_vdpa_get_vq_align,
781         .get_device_features    = vduse_vdpa_get_device_features,
782         .set_driver_features    = vduse_vdpa_set_driver_features,
783         .get_driver_features    = vduse_vdpa_get_driver_features,
784         .set_config_cb          = vduse_vdpa_set_config_cb,
785         .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
786         .get_device_id          = vduse_vdpa_get_device_id,
787         .get_vendor_id          = vduse_vdpa_get_vendor_id,
788         .get_status             = vduse_vdpa_get_status,
789         .set_status             = vduse_vdpa_set_status,
790         .get_config_size        = vduse_vdpa_get_config_size,
791         .get_config             = vduse_vdpa_get_config,
792         .set_config             = vduse_vdpa_set_config,
793         .get_generation         = vduse_vdpa_get_generation,
794         .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
795         .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
796         .reset                  = vduse_vdpa_reset,
797         .set_map                = vduse_vdpa_set_map,
798         .free                   = vduse_vdpa_free,
799 };
800
801 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
802                                      unsigned long offset, size_t size,
803                                      enum dma_data_direction dir,
804                                      unsigned long attrs)
805 {
806         struct vduse_dev *vdev = dev_to_vduse(dev);
807         struct vduse_iova_domain *domain = vdev->domain;
808
809         return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
810 }
811
812 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
813                                 size_t size, enum dma_data_direction dir,
814                                 unsigned long attrs)
815 {
816         struct vduse_dev *vdev = dev_to_vduse(dev);
817         struct vduse_iova_domain *domain = vdev->domain;
818
819         return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
820 }
821
822 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
823                                         dma_addr_t *dma_addr, gfp_t flag,
824                                         unsigned long attrs)
825 {
826         struct vduse_dev *vdev = dev_to_vduse(dev);
827         struct vduse_iova_domain *domain = vdev->domain;
828         unsigned long iova;
829         void *addr;
830
831         *dma_addr = DMA_MAPPING_ERROR;
832         addr = vduse_domain_alloc_coherent(domain, size,
833                                 (dma_addr_t *)&iova, flag, attrs);
834         if (!addr)
835                 return NULL;
836
837         *dma_addr = (dma_addr_t)iova;
838
839         return addr;
840 }
841
842 static void vduse_dev_free_coherent(struct device *dev, size_t size,
843                                         void *vaddr, dma_addr_t dma_addr,
844                                         unsigned long attrs)
845 {
846         struct vduse_dev *vdev = dev_to_vduse(dev);
847         struct vduse_iova_domain *domain = vdev->domain;
848
849         vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
850 }
851
852 static size_t vduse_dev_max_mapping_size(struct device *dev)
853 {
854         struct vduse_dev *vdev = dev_to_vduse(dev);
855         struct vduse_iova_domain *domain = vdev->domain;
856
857         return domain->bounce_size;
858 }
859
860 static const struct dma_map_ops vduse_dev_dma_ops = {
861         .map_page = vduse_dev_map_page,
862         .unmap_page = vduse_dev_unmap_page,
863         .alloc = vduse_dev_alloc_coherent,
864         .free = vduse_dev_free_coherent,
865         .max_mapping_size = vduse_dev_max_mapping_size,
866 };
867
868 static unsigned int perm_to_file_flags(u8 perm)
869 {
870         unsigned int flags = 0;
871
872         switch (perm) {
873         case VDUSE_ACCESS_WO:
874                 flags |= O_WRONLY;
875                 break;
876         case VDUSE_ACCESS_RO:
877                 flags |= O_RDONLY;
878                 break;
879         case VDUSE_ACCESS_RW:
880                 flags |= O_RDWR;
881                 break;
882         default:
883                 WARN(1, "invalidate vhost IOTLB permission\n");
884                 break;
885         }
886
887         return flags;
888 }
889
890 static int vduse_kickfd_setup(struct vduse_dev *dev,
891                         struct vduse_vq_eventfd *eventfd)
892 {
893         struct eventfd_ctx *ctx = NULL;
894         struct vduse_virtqueue *vq;
895         u32 index;
896
897         if (eventfd->index >= dev->vq_num)
898                 return -EINVAL;
899
900         index = array_index_nospec(eventfd->index, dev->vq_num);
901         vq = dev->vqs[index];
902         if (eventfd->fd >= 0) {
903                 ctx = eventfd_ctx_fdget(eventfd->fd);
904                 if (IS_ERR(ctx))
905                         return PTR_ERR(ctx);
906         } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
907                 return 0;
908
909         spin_lock(&vq->kick_lock);
910         if (vq->kickfd)
911                 eventfd_ctx_put(vq->kickfd);
912         vq->kickfd = ctx;
913         if (vq->ready && vq->kicked && vq->kickfd) {
914                 eventfd_signal(vq->kickfd);
915                 vq->kicked = false;
916         }
917         spin_unlock(&vq->kick_lock);
918
919         return 0;
920 }
921
922 static bool vduse_dev_is_ready(struct vduse_dev *dev)
923 {
924         int i;
925
926         for (i = 0; i < dev->vq_num; i++)
927                 if (!dev->vqs[i]->num_max)
928                         return false;
929
930         return true;
931 }
932
933 static void vduse_dev_irq_inject(struct work_struct *work)
934 {
935         struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
936
937         spin_lock_bh(&dev->irq_lock);
938         if (dev->config_cb.callback)
939                 dev->config_cb.callback(dev->config_cb.private);
940         spin_unlock_bh(&dev->irq_lock);
941 }
942
943 static void vduse_vq_irq_inject(struct work_struct *work)
944 {
945         struct vduse_virtqueue *vq = container_of(work,
946                                         struct vduse_virtqueue, inject);
947
948         spin_lock_bh(&vq->irq_lock);
949         if (vq->ready && vq->cb.callback)
950                 vq->cb.callback(vq->cb.private);
951         spin_unlock_bh(&vq->irq_lock);
952 }
953
954 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
955 {
956         bool signal = false;
957
958         if (!vq->cb.trigger)
959                 return false;
960
961         spin_lock_irq(&vq->irq_lock);
962         if (vq->ready && vq->cb.trigger) {
963                 eventfd_signal(vq->cb.trigger);
964                 signal = true;
965         }
966         spin_unlock_irq(&vq->irq_lock);
967
968         return signal;
969 }
970
971 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
972                                     struct work_struct *irq_work,
973                                     int irq_effective_cpu)
974 {
975         int ret = -EINVAL;
976
977         down_read(&dev->rwsem);
978         if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
979                 goto unlock;
980
981         ret = 0;
982         if (irq_effective_cpu == IRQ_UNBOUND)
983                 queue_work(vduse_irq_wq, irq_work);
984         else
985                 queue_work_on(irq_effective_cpu,
986                               vduse_irq_bound_wq, irq_work);
987 unlock:
988         up_read(&dev->rwsem);
989
990         return ret;
991 }
992
993 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
994                                 u64 iova, u64 size)
995 {
996         int ret;
997
998         mutex_lock(&dev->mem_lock);
999         ret = -ENOENT;
1000         if (!dev->umem)
1001                 goto unlock;
1002
1003         ret = -EINVAL;
1004         if (!dev->domain)
1005                 goto unlock;
1006
1007         if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1008                 goto unlock;
1009
1010         vduse_domain_remove_user_bounce_pages(dev->domain);
1011         unpin_user_pages_dirty_lock(dev->umem->pages,
1012                                     dev->umem->npages, true);
1013         atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1014         mmdrop(dev->umem->mm);
1015         vfree(dev->umem->pages);
1016         kfree(dev->umem);
1017         dev->umem = NULL;
1018         ret = 0;
1019 unlock:
1020         mutex_unlock(&dev->mem_lock);
1021         return ret;
1022 }
1023
1024 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1025                               u64 iova, u64 uaddr, u64 size)
1026 {
1027         struct page **page_list = NULL;
1028         struct vduse_umem *umem = NULL;
1029         long pinned = 0;
1030         unsigned long npages, lock_limit;
1031         int ret;
1032
1033         if (!dev->domain || !dev->domain->bounce_map ||
1034             size != dev->domain->bounce_size ||
1035             iova != 0 || uaddr & ~PAGE_MASK)
1036                 return -EINVAL;
1037
1038         mutex_lock(&dev->mem_lock);
1039         ret = -EEXIST;
1040         if (dev->umem)
1041                 goto unlock;
1042
1043         ret = -ENOMEM;
1044         npages = size >> PAGE_SHIFT;
1045         page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1046                               GFP_KERNEL_ACCOUNT);
1047         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1048         if (!page_list || !umem)
1049                 goto unlock;
1050
1051         mmap_read_lock(current->mm);
1052
1053         lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1054         if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
1055                 goto out;
1056
1057         pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1058                                 page_list);
1059         if (pinned != npages) {
1060                 ret = pinned < 0 ? pinned : -ENOMEM;
1061                 goto out;
1062         }
1063
1064         ret = vduse_domain_add_user_bounce_pages(dev->domain,
1065                                                  page_list, pinned);
1066         if (ret)
1067                 goto out;
1068
1069         atomic64_add(npages, &current->mm->pinned_vm);
1070
1071         umem->pages = page_list;
1072         umem->npages = pinned;
1073         umem->iova = iova;
1074         umem->mm = current->mm;
1075         mmgrab(current->mm);
1076
1077         dev->umem = umem;
1078 out:
1079         if (ret && pinned > 0)
1080                 unpin_user_pages(page_list, pinned);
1081
1082         mmap_read_unlock(current->mm);
1083 unlock:
1084         if (ret) {
1085                 vfree(page_list);
1086                 kfree(umem);
1087         }
1088         mutex_unlock(&dev->mem_lock);
1089         return ret;
1090 }
1091
1092 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1093 {
1094         int curr_cpu = vq->irq_effective_cpu;
1095
1096         while (true) {
1097                 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1098                 if (cpu_online(curr_cpu))
1099                         break;
1100
1101                 if (curr_cpu >= nr_cpu_ids)
1102                         curr_cpu = IRQ_UNBOUND;
1103         }
1104
1105         vq->irq_effective_cpu = curr_cpu;
1106 }
1107
1108 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1109                             unsigned long arg)
1110 {
1111         struct vduse_dev *dev = file->private_data;
1112         void __user *argp = (void __user *)arg;
1113         int ret;
1114
1115         if (unlikely(dev->broken))
1116                 return -EPERM;
1117
1118         switch (cmd) {
1119         case VDUSE_IOTLB_GET_FD: {
1120                 struct vduse_iotlb_entry entry;
1121                 struct vhost_iotlb_map *map;
1122                 struct vdpa_map_file *map_file;
1123                 struct file *f = NULL;
1124
1125                 ret = -EFAULT;
1126                 if (copy_from_user(&entry, argp, sizeof(entry)))
1127                         break;
1128
1129                 ret = -EINVAL;
1130                 if (entry.start > entry.last)
1131                         break;
1132
1133                 mutex_lock(&dev->domain_lock);
1134                 if (!dev->domain) {
1135                         mutex_unlock(&dev->domain_lock);
1136                         break;
1137                 }
1138                 spin_lock(&dev->domain->iotlb_lock);
1139                 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1140                                               entry.start, entry.last);
1141                 if (map) {
1142                         map_file = (struct vdpa_map_file *)map->opaque;
1143                         f = get_file(map_file->file);
1144                         entry.offset = map_file->offset;
1145                         entry.start = map->start;
1146                         entry.last = map->last;
1147                         entry.perm = map->perm;
1148                 }
1149                 spin_unlock(&dev->domain->iotlb_lock);
1150                 mutex_unlock(&dev->domain_lock);
1151                 ret = -EINVAL;
1152                 if (!f)
1153                         break;
1154
1155                 ret = -EFAULT;
1156                 if (copy_to_user(argp, &entry, sizeof(entry))) {
1157                         fput(f);
1158                         break;
1159                 }
1160                 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
1161                 fput(f);
1162                 break;
1163         }
1164         case VDUSE_DEV_GET_FEATURES:
1165                 /*
1166                  * Just mirror what driver wrote here.
1167                  * The driver is expected to check FEATURE_OK later.
1168                  */
1169                 ret = put_user(dev->driver_features, (u64 __user *)argp);
1170                 break;
1171         case VDUSE_DEV_SET_CONFIG: {
1172                 struct vduse_config_data config;
1173                 unsigned long size = offsetof(struct vduse_config_data,
1174                                               buffer);
1175
1176                 ret = -EFAULT;
1177                 if (copy_from_user(&config, argp, size))
1178                         break;
1179
1180                 ret = -EINVAL;
1181                 if (config.offset > dev->config_size ||
1182                     config.length == 0 ||
1183                     config.length > dev->config_size - config.offset)
1184                         break;
1185
1186                 ret = -EFAULT;
1187                 if (copy_from_user(dev->config + config.offset, argp + size,
1188                                    config.length))
1189                         break;
1190
1191                 ret = 0;
1192                 break;
1193         }
1194         case VDUSE_DEV_INJECT_CONFIG_IRQ:
1195                 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1196                 break;
1197         case VDUSE_VQ_SETUP: {
1198                 struct vduse_vq_config config;
1199                 u32 index;
1200
1201                 ret = -EFAULT;
1202                 if (copy_from_user(&config, argp, sizeof(config)))
1203                         break;
1204
1205                 ret = -EINVAL;
1206                 if (config.index >= dev->vq_num)
1207                         break;
1208
1209                 if (!is_mem_zero((const char *)config.reserved,
1210                                  sizeof(config.reserved)))
1211                         break;
1212
1213                 index = array_index_nospec(config.index, dev->vq_num);
1214                 dev->vqs[index]->num_max = config.max_size;
1215                 ret = 0;
1216                 break;
1217         }
1218         case VDUSE_VQ_GET_INFO: {
1219                 struct vduse_vq_info vq_info;
1220                 struct vduse_virtqueue *vq;
1221                 u32 index;
1222
1223                 ret = -EFAULT;
1224                 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1225                         break;
1226
1227                 ret = -EINVAL;
1228                 if (vq_info.index >= dev->vq_num)
1229                         break;
1230
1231                 index = array_index_nospec(vq_info.index, dev->vq_num);
1232                 vq = dev->vqs[index];
1233                 vq_info.desc_addr = vq->desc_addr;
1234                 vq_info.driver_addr = vq->driver_addr;
1235                 vq_info.device_addr = vq->device_addr;
1236                 vq_info.num = vq->num;
1237
1238                 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1239                         vq_info.packed.last_avail_counter =
1240                                 vq->state.packed.last_avail_counter;
1241                         vq_info.packed.last_avail_idx =
1242                                 vq->state.packed.last_avail_idx;
1243                         vq_info.packed.last_used_counter =
1244                                 vq->state.packed.last_used_counter;
1245                         vq_info.packed.last_used_idx =
1246                                 vq->state.packed.last_used_idx;
1247                 } else
1248                         vq_info.split.avail_index =
1249                                 vq->state.split.avail_index;
1250
1251                 vq_info.ready = vq->ready;
1252
1253                 ret = -EFAULT;
1254                 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1255                         break;
1256
1257                 ret = 0;
1258                 break;
1259         }
1260         case VDUSE_VQ_SETUP_KICKFD: {
1261                 struct vduse_vq_eventfd eventfd;
1262
1263                 ret = -EFAULT;
1264                 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1265                         break;
1266
1267                 ret = vduse_kickfd_setup(dev, &eventfd);
1268                 break;
1269         }
1270         case VDUSE_VQ_INJECT_IRQ: {
1271                 u32 index;
1272
1273                 ret = -EFAULT;
1274                 if (get_user(index, (u32 __user *)argp))
1275                         break;
1276
1277                 ret = -EINVAL;
1278                 if (index >= dev->vq_num)
1279                         break;
1280
1281                 ret = 0;
1282                 index = array_index_nospec(index, dev->vq_num);
1283                 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1284                         vduse_vq_update_effective_cpu(dev->vqs[index]);
1285                         ret = vduse_dev_queue_irq_work(dev,
1286                                                 &dev->vqs[index]->inject,
1287                                                 dev->vqs[index]->irq_effective_cpu);
1288                 }
1289                 break;
1290         }
1291         case VDUSE_IOTLB_REG_UMEM: {
1292                 struct vduse_iova_umem umem;
1293
1294                 ret = -EFAULT;
1295                 if (copy_from_user(&umem, argp, sizeof(umem)))
1296                         break;
1297
1298                 ret = -EINVAL;
1299                 if (!is_mem_zero((const char *)umem.reserved,
1300                                  sizeof(umem.reserved)))
1301                         break;
1302
1303                 mutex_lock(&dev->domain_lock);
1304                 ret = vduse_dev_reg_umem(dev, umem.iova,
1305                                          umem.uaddr, umem.size);
1306                 mutex_unlock(&dev->domain_lock);
1307                 break;
1308         }
1309         case VDUSE_IOTLB_DEREG_UMEM: {
1310                 struct vduse_iova_umem umem;
1311
1312                 ret = -EFAULT;
1313                 if (copy_from_user(&umem, argp, sizeof(umem)))
1314                         break;
1315
1316                 ret = -EINVAL;
1317                 if (!is_mem_zero((const char *)umem.reserved,
1318                                  sizeof(umem.reserved)))
1319                         break;
1320                 mutex_lock(&dev->domain_lock);
1321                 ret = vduse_dev_dereg_umem(dev, umem.iova,
1322                                            umem.size);
1323                 mutex_unlock(&dev->domain_lock);
1324                 break;
1325         }
1326         case VDUSE_IOTLB_GET_INFO: {
1327                 struct vduse_iova_info info;
1328                 struct vhost_iotlb_map *map;
1329
1330                 ret = -EFAULT;
1331                 if (copy_from_user(&info, argp, sizeof(info)))
1332                         break;
1333
1334                 ret = -EINVAL;
1335                 if (info.start > info.last)
1336                         break;
1337
1338                 if (!is_mem_zero((const char *)info.reserved,
1339                                  sizeof(info.reserved)))
1340                         break;
1341
1342                 mutex_lock(&dev->domain_lock);
1343                 if (!dev->domain) {
1344                         mutex_unlock(&dev->domain_lock);
1345                         break;
1346                 }
1347                 spin_lock(&dev->domain->iotlb_lock);
1348                 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1349                                               info.start, info.last);
1350                 if (map) {
1351                         info.start = map->start;
1352                         info.last = map->last;
1353                         info.capability = 0;
1354                         if (dev->domain->bounce_map && map->start == 0 &&
1355                             map->last == dev->domain->bounce_size - 1)
1356                                 info.capability |= VDUSE_IOVA_CAP_UMEM;
1357                 }
1358                 spin_unlock(&dev->domain->iotlb_lock);
1359                 mutex_unlock(&dev->domain_lock);
1360                 if (!map)
1361                         break;
1362
1363                 ret = -EFAULT;
1364                 if (copy_to_user(argp, &info, sizeof(info)))
1365                         break;
1366
1367                 ret = 0;
1368                 break;
1369         }
1370         default:
1371                 ret = -ENOIOCTLCMD;
1372                 break;
1373         }
1374
1375         return ret;
1376 }
1377
1378 static int vduse_dev_release(struct inode *inode, struct file *file)
1379 {
1380         struct vduse_dev *dev = file->private_data;
1381
1382         mutex_lock(&dev->domain_lock);
1383         if (dev->domain)
1384                 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1385         mutex_unlock(&dev->domain_lock);
1386         spin_lock(&dev->msg_lock);
1387         /* Make sure the inflight messages can processed after reconncection */
1388         list_splice_init(&dev->recv_list, &dev->send_list);
1389         spin_unlock(&dev->msg_lock);
1390         dev->connected = false;
1391
1392         return 0;
1393 }
1394
1395 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1396 {
1397         struct vduse_dev *dev;
1398
1399         mutex_lock(&vduse_lock);
1400         dev = idr_find(&vduse_idr, minor);
1401         mutex_unlock(&vduse_lock);
1402
1403         return dev;
1404 }
1405
1406 static int vduse_dev_open(struct inode *inode, struct file *file)
1407 {
1408         int ret;
1409         struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1410
1411         if (!dev)
1412                 return -ENODEV;
1413
1414         ret = -EBUSY;
1415         mutex_lock(&dev->lock);
1416         if (dev->connected)
1417                 goto unlock;
1418
1419         ret = 0;
1420         dev->connected = true;
1421         file->private_data = dev;
1422 unlock:
1423         mutex_unlock(&dev->lock);
1424
1425         return ret;
1426 }
1427
1428 static const struct file_operations vduse_dev_fops = {
1429         .owner          = THIS_MODULE,
1430         .open           = vduse_dev_open,
1431         .release        = vduse_dev_release,
1432         .read_iter      = vduse_dev_read_iter,
1433         .write_iter     = vduse_dev_write_iter,
1434         .poll           = vduse_dev_poll,
1435         .unlocked_ioctl = vduse_dev_ioctl,
1436         .compat_ioctl   = compat_ptr_ioctl,
1437         .llseek         = noop_llseek,
1438 };
1439
1440 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1441 {
1442         return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1443 }
1444
1445 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1446                                      const char *buf, size_t count)
1447 {
1448         cpumask_var_t new_value;
1449         int ret;
1450
1451         if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1452                 return -ENOMEM;
1453
1454         ret = cpumask_parse(buf, new_value);
1455         if (ret)
1456                 goto free_mask;
1457
1458         ret = -EINVAL;
1459         if (!cpumask_intersects(new_value, cpu_online_mask))
1460                 goto free_mask;
1461
1462         cpumask_copy(&vq->irq_affinity, new_value);
1463         ret = count;
1464 free_mask:
1465         free_cpumask_var(new_value);
1466         return ret;
1467 }
1468
1469 struct vq_sysfs_entry {
1470         struct attribute attr;
1471         ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1472         ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1473                          size_t count);
1474 };
1475
1476 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1477
1478 static struct attribute *vq_attrs[] = {
1479         &irq_cb_affinity_attr.attr,
1480         NULL,
1481 };
1482 ATTRIBUTE_GROUPS(vq);
1483
1484 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1485                             char *buf)
1486 {
1487         struct vduse_virtqueue *vq = container_of(kobj,
1488                                         struct vduse_virtqueue, kobj);
1489         struct vq_sysfs_entry *entry = container_of(attr,
1490                                         struct vq_sysfs_entry, attr);
1491
1492         if (!entry->show)
1493                 return -EIO;
1494
1495         return entry->show(vq, buf);
1496 }
1497
1498 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1499                              const char *buf, size_t count)
1500 {
1501         struct vduse_virtqueue *vq = container_of(kobj,
1502                                         struct vduse_virtqueue, kobj);
1503         struct vq_sysfs_entry *entry = container_of(attr,
1504                                         struct vq_sysfs_entry, attr);
1505
1506         if (!entry->store)
1507                 return -EIO;
1508
1509         return entry->store(vq, buf, count);
1510 }
1511
1512 static const struct sysfs_ops vq_sysfs_ops = {
1513         .show = vq_attr_show,
1514         .store = vq_attr_store,
1515 };
1516
1517 static void vq_release(struct kobject *kobj)
1518 {
1519         struct vduse_virtqueue *vq = container_of(kobj,
1520                                         struct vduse_virtqueue, kobj);
1521         kfree(vq);
1522 }
1523
1524 static const struct kobj_type vq_type = {
1525         .release        = vq_release,
1526         .sysfs_ops      = &vq_sysfs_ops,
1527         .default_groups = vq_groups,
1528 };
1529
1530 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1531 {
1532         return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1533 }
1534
1535 static const struct class vduse_class = {
1536         .name = "vduse",
1537         .devnode = vduse_devnode,
1538 };
1539
1540 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1541 {
1542         int i;
1543
1544         if (!dev->vqs)
1545                 return;
1546
1547         for (i = 0; i < dev->vq_num; i++)
1548                 kobject_put(&dev->vqs[i]->kobj);
1549         kfree(dev->vqs);
1550 }
1551
1552 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1553 {
1554         int ret, i;
1555
1556         dev->vq_align = vq_align;
1557         dev->vq_num = vq_num;
1558         dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1559         if (!dev->vqs)
1560                 return -ENOMEM;
1561
1562         for (i = 0; i < vq_num; i++) {
1563                 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1564                 if (!dev->vqs[i]) {
1565                         ret = -ENOMEM;
1566                         goto err;
1567                 }
1568
1569                 dev->vqs[i]->index = i;
1570                 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1571                 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1572                 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1573                 spin_lock_init(&dev->vqs[i]->kick_lock);
1574                 spin_lock_init(&dev->vqs[i]->irq_lock);
1575                 cpumask_setall(&dev->vqs[i]->irq_affinity);
1576
1577                 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1578                 ret = kobject_add(&dev->vqs[i]->kobj,
1579                                   &dev->dev->kobj, "vq%d", i);
1580                 if (ret) {
1581                         kfree(dev->vqs[i]);
1582                         goto err;
1583                 }
1584         }
1585
1586         return 0;
1587 err:
1588         while (i--)
1589                 kobject_put(&dev->vqs[i]->kobj);
1590         kfree(dev->vqs);
1591         dev->vqs = NULL;
1592         return ret;
1593 }
1594
1595 static struct vduse_dev *vduse_dev_create(void)
1596 {
1597         struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1598
1599         if (!dev)
1600                 return NULL;
1601
1602         mutex_init(&dev->lock);
1603         mutex_init(&dev->mem_lock);
1604         mutex_init(&dev->domain_lock);
1605         spin_lock_init(&dev->msg_lock);
1606         INIT_LIST_HEAD(&dev->send_list);
1607         INIT_LIST_HEAD(&dev->recv_list);
1608         spin_lock_init(&dev->irq_lock);
1609         init_rwsem(&dev->rwsem);
1610
1611         INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1612         init_waitqueue_head(&dev->waitq);
1613
1614         return dev;
1615 }
1616
1617 static void vduse_dev_destroy(struct vduse_dev *dev)
1618 {
1619         kfree(dev);
1620 }
1621
1622 static struct vduse_dev *vduse_find_dev(const char *name)
1623 {
1624         struct vduse_dev *dev;
1625         int id;
1626
1627         idr_for_each_entry(&vduse_idr, dev, id)
1628                 if (!strcmp(dev->name, name))
1629                         return dev;
1630
1631         return NULL;
1632 }
1633
1634 static int vduse_destroy_dev(char *name)
1635 {
1636         struct vduse_dev *dev = vduse_find_dev(name);
1637
1638         if (!dev)
1639                 return -EINVAL;
1640
1641         mutex_lock(&dev->lock);
1642         if (dev->vdev || dev->connected) {
1643                 mutex_unlock(&dev->lock);
1644                 return -EBUSY;
1645         }
1646         dev->connected = true;
1647         mutex_unlock(&dev->lock);
1648
1649         vduse_dev_reset(dev);
1650         device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1651         idr_remove(&vduse_idr, dev->minor);
1652         kvfree(dev->config);
1653         vduse_dev_deinit_vqs(dev);
1654         if (dev->domain)
1655                 vduse_domain_destroy(dev->domain);
1656         kfree(dev->name);
1657         vduse_dev_destroy(dev);
1658         module_put(THIS_MODULE);
1659
1660         return 0;
1661 }
1662
1663 static bool device_is_allowed(u32 device_id)
1664 {
1665         int i;
1666
1667         for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1668                 if (allowed_device_id[i] == device_id)
1669                         return true;
1670
1671         return false;
1672 }
1673
1674 static bool features_is_valid(u64 features)
1675 {
1676         if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1677                 return false;
1678
1679         /* Now we only support read-only configuration space */
1680         if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1681                 return false;
1682
1683         return true;
1684 }
1685
1686 static bool vduse_validate_config(struct vduse_dev_config *config)
1687 {
1688         if (!is_mem_zero((const char *)config->reserved,
1689                          sizeof(config->reserved)))
1690                 return false;
1691
1692         if (config->vq_align > PAGE_SIZE)
1693                 return false;
1694
1695         if (config->config_size > PAGE_SIZE)
1696                 return false;
1697
1698         if (config->vq_num > 0xffff)
1699                 return false;
1700
1701         if (!config->name[0])
1702                 return false;
1703
1704         if (!device_is_allowed(config->device_id))
1705                 return false;
1706
1707         if (!features_is_valid(config->features))
1708                 return false;
1709
1710         return true;
1711 }
1712
1713 static ssize_t msg_timeout_show(struct device *device,
1714                                 struct device_attribute *attr, char *buf)
1715 {
1716         struct vduse_dev *dev = dev_get_drvdata(device);
1717
1718         return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1719 }
1720
1721 static ssize_t msg_timeout_store(struct device *device,
1722                                  struct device_attribute *attr,
1723                                  const char *buf, size_t count)
1724 {
1725         struct vduse_dev *dev = dev_get_drvdata(device);
1726         int ret;
1727
1728         ret = kstrtouint(buf, 10, &dev->msg_timeout);
1729         if (ret < 0)
1730                 return ret;
1731
1732         return count;
1733 }
1734
1735 static DEVICE_ATTR_RW(msg_timeout);
1736
1737 static ssize_t bounce_size_show(struct device *device,
1738                                 struct device_attribute *attr, char *buf)
1739 {
1740         struct vduse_dev *dev = dev_get_drvdata(device);
1741
1742         return sysfs_emit(buf, "%u\n", dev->bounce_size);
1743 }
1744
1745 static ssize_t bounce_size_store(struct device *device,
1746                                  struct device_attribute *attr,
1747                                  const char *buf, size_t count)
1748 {
1749         struct vduse_dev *dev = dev_get_drvdata(device);
1750         unsigned int bounce_size;
1751         int ret;
1752
1753         ret = -EPERM;
1754         mutex_lock(&dev->domain_lock);
1755         if (dev->domain)
1756                 goto unlock;
1757
1758         ret = kstrtouint(buf, 10, &bounce_size);
1759         if (ret < 0)
1760                 goto unlock;
1761
1762         ret = -EINVAL;
1763         if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1764             bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1765                 goto unlock;
1766
1767         dev->bounce_size = bounce_size & PAGE_MASK;
1768         ret = count;
1769 unlock:
1770         mutex_unlock(&dev->domain_lock);
1771         return ret;
1772 }
1773
1774 static DEVICE_ATTR_RW(bounce_size);
1775
1776 static struct attribute *vduse_dev_attrs[] = {
1777         &dev_attr_msg_timeout.attr,
1778         &dev_attr_bounce_size.attr,
1779         NULL
1780 };
1781
1782 ATTRIBUTE_GROUPS(vduse_dev);
1783
1784 static int vduse_create_dev(struct vduse_dev_config *config,
1785                             void *config_buf, u64 api_version)
1786 {
1787         int ret;
1788         struct vduse_dev *dev;
1789
1790         ret = -EEXIST;
1791         if (vduse_find_dev(config->name))
1792                 goto err;
1793
1794         ret = -ENOMEM;
1795         dev = vduse_dev_create();
1796         if (!dev)
1797                 goto err;
1798
1799         dev->api_version = api_version;
1800         dev->device_features = config->features;
1801         dev->device_id = config->device_id;
1802         dev->vendor_id = config->vendor_id;
1803         dev->name = kstrdup(config->name, GFP_KERNEL);
1804         if (!dev->name)
1805                 goto err_str;
1806
1807         dev->bounce_size = VDUSE_BOUNCE_SIZE;
1808         dev->config = config_buf;
1809         dev->config_size = config->config_size;
1810
1811         ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1812         if (ret < 0)
1813                 goto err_idr;
1814
1815         dev->minor = ret;
1816         dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1817         dev->dev = device_create_with_groups(&vduse_class, NULL,
1818                                 MKDEV(MAJOR(vduse_major), dev->minor),
1819                                 dev, vduse_dev_groups, "%s", config->name);
1820         if (IS_ERR(dev->dev)) {
1821                 ret = PTR_ERR(dev->dev);
1822                 goto err_dev;
1823         }
1824
1825         ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1826         if (ret)
1827                 goto err_vqs;
1828
1829         __module_get(THIS_MODULE);
1830
1831         return 0;
1832 err_vqs:
1833         device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1834 err_dev:
1835         idr_remove(&vduse_idr, dev->minor);
1836 err_idr:
1837         kfree(dev->name);
1838 err_str:
1839         vduse_dev_destroy(dev);
1840 err:
1841         return ret;
1842 }
1843
1844 static long vduse_ioctl(struct file *file, unsigned int cmd,
1845                         unsigned long arg)
1846 {
1847         int ret;
1848         void __user *argp = (void __user *)arg;
1849         struct vduse_control *control = file->private_data;
1850
1851         mutex_lock(&vduse_lock);
1852         switch (cmd) {
1853         case VDUSE_GET_API_VERSION:
1854                 ret = put_user(control->api_version, (u64 __user *)argp);
1855                 break;
1856         case VDUSE_SET_API_VERSION: {
1857                 u64 api_version;
1858
1859                 ret = -EFAULT;
1860                 if (get_user(api_version, (u64 __user *)argp))
1861                         break;
1862
1863                 ret = -EINVAL;
1864                 if (api_version > VDUSE_API_VERSION)
1865                         break;
1866
1867                 ret = 0;
1868                 control->api_version = api_version;
1869                 break;
1870         }
1871         case VDUSE_CREATE_DEV: {
1872                 struct vduse_dev_config config;
1873                 unsigned long size = offsetof(struct vduse_dev_config, config);
1874                 void *buf;
1875
1876                 ret = -EFAULT;
1877                 if (copy_from_user(&config, argp, size))
1878                         break;
1879
1880                 ret = -EINVAL;
1881                 if (vduse_validate_config(&config) == false)
1882                         break;
1883
1884                 buf = vmemdup_user(argp + size, config.config_size);
1885                 if (IS_ERR(buf)) {
1886                         ret = PTR_ERR(buf);
1887                         break;
1888                 }
1889                 config.name[VDUSE_NAME_MAX - 1] = '\0';
1890                 ret = vduse_create_dev(&config, buf, control->api_version);
1891                 if (ret)
1892                         kvfree(buf);
1893                 break;
1894         }
1895         case VDUSE_DESTROY_DEV: {
1896                 char name[VDUSE_NAME_MAX];
1897
1898                 ret = -EFAULT;
1899                 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1900                         break;
1901
1902                 name[VDUSE_NAME_MAX - 1] = '\0';
1903                 ret = vduse_destroy_dev(name);
1904                 break;
1905         }
1906         default:
1907                 ret = -EINVAL;
1908                 break;
1909         }
1910         mutex_unlock(&vduse_lock);
1911
1912         return ret;
1913 }
1914
1915 static int vduse_release(struct inode *inode, struct file *file)
1916 {
1917         struct vduse_control *control = file->private_data;
1918
1919         kfree(control);
1920         return 0;
1921 }
1922
1923 static int vduse_open(struct inode *inode, struct file *file)
1924 {
1925         struct vduse_control *control;
1926
1927         control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1928         if (!control)
1929                 return -ENOMEM;
1930
1931         control->api_version = VDUSE_API_VERSION;
1932         file->private_data = control;
1933
1934         return 0;
1935 }
1936
1937 static const struct file_operations vduse_ctrl_fops = {
1938         .owner          = THIS_MODULE,
1939         .open           = vduse_open,
1940         .release        = vduse_release,
1941         .unlocked_ioctl = vduse_ioctl,
1942         .compat_ioctl   = compat_ptr_ioctl,
1943         .llseek         = noop_llseek,
1944 };
1945
1946 struct vduse_mgmt_dev {
1947         struct vdpa_mgmt_dev mgmt_dev;
1948         struct device dev;
1949 };
1950
1951 static struct vduse_mgmt_dev *vduse_mgmt;
1952
1953 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1954 {
1955         struct vduse_vdpa *vdev;
1956         int ret;
1957
1958         if (dev->vdev)
1959                 return -EEXIST;
1960
1961         vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1962                                  &vduse_vdpa_config_ops, 1, 1, name, true);
1963         if (IS_ERR(vdev))
1964                 return PTR_ERR(vdev);
1965
1966         dev->vdev = vdev;
1967         vdev->dev = dev;
1968         vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1969         ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1970         if (ret) {
1971                 put_device(&vdev->vdpa.dev);
1972                 return ret;
1973         }
1974         set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1975         vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1976         vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1977
1978         return 0;
1979 }
1980
1981 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1982                         const struct vdpa_dev_set_config *config)
1983 {
1984         struct vduse_dev *dev;
1985         int ret;
1986
1987         mutex_lock(&vduse_lock);
1988         dev = vduse_find_dev(name);
1989         if (!dev || !vduse_dev_is_ready(dev)) {
1990                 mutex_unlock(&vduse_lock);
1991                 return -EINVAL;
1992         }
1993         ret = vduse_dev_init_vdpa(dev, name);
1994         mutex_unlock(&vduse_lock);
1995         if (ret)
1996                 return ret;
1997
1998         mutex_lock(&dev->domain_lock);
1999         if (!dev->domain)
2000                 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2001                                                   dev->bounce_size);
2002         mutex_unlock(&dev->domain_lock);
2003         if (!dev->domain) {
2004                 put_device(&dev->vdev->vdpa.dev);
2005                 return -ENOMEM;
2006         }
2007
2008         ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2009         if (ret) {
2010                 put_device(&dev->vdev->vdpa.dev);
2011                 mutex_lock(&dev->domain_lock);
2012                 vduse_domain_destroy(dev->domain);
2013                 dev->domain = NULL;
2014                 mutex_unlock(&dev->domain_lock);
2015                 return ret;
2016         }
2017
2018         return 0;
2019 }
2020
2021 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2022 {
2023         _vdpa_unregister_device(dev);
2024 }
2025
2026 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2027         .dev_add = vdpa_dev_add,
2028         .dev_del = vdpa_dev_del,
2029 };
2030
2031 static struct virtio_device_id id_table[] = {
2032         { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2033         { 0 },
2034 };
2035
2036 static void vduse_mgmtdev_release(struct device *dev)
2037 {
2038         struct vduse_mgmt_dev *mgmt_dev;
2039
2040         mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2041         kfree(mgmt_dev);
2042 }
2043
2044 static int vduse_mgmtdev_init(void)
2045 {
2046         int ret;
2047
2048         vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2049         if (!vduse_mgmt)
2050                 return -ENOMEM;
2051
2052         ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2053         if (ret) {
2054                 kfree(vduse_mgmt);
2055                 return ret;
2056         }
2057
2058         vduse_mgmt->dev.release = vduse_mgmtdev_release;
2059
2060         ret = device_register(&vduse_mgmt->dev);
2061         if (ret)
2062                 goto dev_reg_err;
2063
2064         vduse_mgmt->mgmt_dev.id_table = id_table;
2065         vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2066         vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2067         ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2068         if (ret)
2069                 device_unregister(&vduse_mgmt->dev);
2070
2071         return ret;
2072
2073 dev_reg_err:
2074         put_device(&vduse_mgmt->dev);
2075         return ret;
2076 }
2077
2078 static void vduse_mgmtdev_exit(void)
2079 {
2080         vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2081         device_unregister(&vduse_mgmt->dev);
2082 }
2083
2084 static int vduse_init(void)
2085 {
2086         int ret;
2087         struct device *dev;
2088
2089         ret = class_register(&vduse_class);
2090         if (ret)
2091                 return ret;
2092
2093         ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2094         if (ret)
2095                 goto err_chardev_region;
2096
2097         /* /dev/vduse/control */
2098         cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2099         vduse_ctrl_cdev.owner = THIS_MODULE;
2100         ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2101         if (ret)
2102                 goto err_ctrl_cdev;
2103
2104         dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
2105         if (IS_ERR(dev)) {
2106                 ret = PTR_ERR(dev);
2107                 goto err_device;
2108         }
2109
2110         /* /dev/vduse/$DEVICE */
2111         cdev_init(&vduse_cdev, &vduse_dev_fops);
2112         vduse_cdev.owner = THIS_MODULE;
2113         ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2114                        VDUSE_DEV_MAX - 1);
2115         if (ret)
2116                 goto err_cdev;
2117
2118         ret = -ENOMEM;
2119         vduse_irq_wq = alloc_workqueue("vduse-irq",
2120                                 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2121         if (!vduse_irq_wq)
2122                 goto err_wq;
2123
2124         vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2125         if (!vduse_irq_bound_wq)
2126                 goto err_bound_wq;
2127
2128         ret = vduse_domain_init();
2129         if (ret)
2130                 goto err_domain;
2131
2132         ret = vduse_mgmtdev_init();
2133         if (ret)
2134                 goto err_mgmtdev;
2135
2136         return 0;
2137 err_mgmtdev:
2138         vduse_domain_exit();
2139 err_domain:
2140         destroy_workqueue(vduse_irq_bound_wq);
2141 err_bound_wq:
2142         destroy_workqueue(vduse_irq_wq);
2143 err_wq:
2144         cdev_del(&vduse_cdev);
2145 err_cdev:
2146         device_destroy(&vduse_class, vduse_major);
2147 err_device:
2148         cdev_del(&vduse_ctrl_cdev);
2149 err_ctrl_cdev:
2150         unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2151 err_chardev_region:
2152         class_unregister(&vduse_class);
2153         return ret;
2154 }
2155 module_init(vduse_init);
2156
2157 static void vduse_exit(void)
2158 {
2159         vduse_mgmtdev_exit();
2160         vduse_domain_exit();
2161         destroy_workqueue(vduse_irq_bound_wq);
2162         destroy_workqueue(vduse_irq_wq);
2163         cdev_del(&vduse_cdev);
2164         device_destroy(&vduse_class, vduse_major);
2165         cdev_del(&vduse_ctrl_cdev);
2166         unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2167         class_unregister(&vduse_class);
2168 }
2169 module_exit(vduse_exit);
2170
2171 MODULE_LICENSE(DRV_LICENSE);
2172 MODULE_AUTHOR(DRV_AUTHOR);
2173 MODULE_DESCRIPTION(DRV_DESC);